sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.LOCK, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *ALTERABLES, 574 *CREATABLES, 575 *SUBQUERY_PREDICATES, 576 *TYPE_TOKENS, 577 *NO_PAREN_FUNCTIONS, 578 } 579 ID_VAR_TOKENS.remove(TokenType.UNION) 580 581 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 582 TokenType.ANTI, 583 TokenType.ASOF, 584 TokenType.FULL, 585 TokenType.LEFT, 586 TokenType.LOCK, 587 TokenType.NATURAL, 588 TokenType.RIGHT, 589 TokenType.SEMI, 590 TokenType.WINDOW, 591 } 592 593 ALIAS_TOKENS = ID_VAR_TOKENS 594 595 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 596 597 ARRAY_CONSTRUCTORS = { 598 "ARRAY": exp.Array, 599 "LIST": exp.List, 600 } 601 602 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 603 604 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 605 606 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 607 608 FUNC_TOKENS = { 609 TokenType.COLLATE, 610 TokenType.COMMAND, 611 TokenType.CURRENT_DATE, 612 TokenType.CURRENT_DATETIME, 613 TokenType.CURRENT_SCHEMA, 614 TokenType.CURRENT_TIMESTAMP, 615 TokenType.CURRENT_TIME, 616 TokenType.CURRENT_USER, 617 TokenType.FILTER, 618 TokenType.FIRST, 619 TokenType.FORMAT, 620 TokenType.GET, 621 TokenType.GLOB, 622 TokenType.IDENTIFIER, 623 TokenType.INDEX, 624 TokenType.ISNULL, 625 TokenType.ILIKE, 626 TokenType.INSERT, 627 TokenType.LIKE, 628 TokenType.MERGE, 629 TokenType.NEXT, 630 TokenType.OFFSET, 631 TokenType.PRIMARY_KEY, 632 TokenType.RANGE, 633 TokenType.REPLACE, 634 TokenType.RLIKE, 635 TokenType.ROW, 636 TokenType.UNNEST, 637 TokenType.VAR, 638 TokenType.LEFT, 639 TokenType.RIGHT, 640 TokenType.SEQUENCE, 641 TokenType.DATE, 642 TokenType.DATETIME, 643 TokenType.TABLE, 644 TokenType.TIMESTAMP, 645 TokenType.TIMESTAMPTZ, 646 TokenType.TRUNCATE, 647 TokenType.UTC_DATE, 648 TokenType.UTC_TIME, 649 TokenType.UTC_TIMESTAMP, 650 TokenType.WINDOW, 651 TokenType.XOR, 652 *TYPE_TOKENS, 653 *SUBQUERY_PREDICATES, 654 } 655 656 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.AND: exp.And, 658 } 659 660 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.COLON_EQ: exp.PropertyEQ, 662 } 663 664 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 665 TokenType.OR: exp.Or, 666 } 667 668 EQUALITY = { 669 TokenType.EQ: exp.EQ, 670 TokenType.NEQ: exp.NEQ, 671 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 672 } 673 674 COMPARISON = { 675 TokenType.GT: exp.GT, 676 TokenType.GTE: exp.GTE, 677 TokenType.LT: exp.LT, 678 TokenType.LTE: exp.LTE, 679 } 680 681 BITWISE = { 682 TokenType.AMP: exp.BitwiseAnd, 683 TokenType.CARET: exp.BitwiseXor, 684 TokenType.PIPE: exp.BitwiseOr, 685 } 686 687 TERM = { 688 TokenType.DASH: exp.Sub, 689 TokenType.PLUS: exp.Add, 690 TokenType.MOD: exp.Mod, 691 TokenType.COLLATE: exp.Collate, 692 } 693 694 FACTOR = { 695 TokenType.DIV: exp.IntDiv, 696 TokenType.LR_ARROW: exp.Distance, 697 TokenType.SLASH: exp.Div, 698 TokenType.STAR: exp.Mul, 699 } 700 701 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 702 703 TIMES = { 704 TokenType.TIME, 705 TokenType.TIMETZ, 706 } 707 708 TIMESTAMPS = { 709 TokenType.TIMESTAMP, 710 TokenType.TIMESTAMPNTZ, 711 TokenType.TIMESTAMPTZ, 712 TokenType.TIMESTAMPLTZ, 713 *TIMES, 714 } 715 716 SET_OPERATIONS = { 717 TokenType.UNION, 718 TokenType.INTERSECT, 719 TokenType.EXCEPT, 720 } 721 722 JOIN_METHODS = { 723 TokenType.ASOF, 724 TokenType.NATURAL, 725 TokenType.POSITIONAL, 726 } 727 728 JOIN_SIDES = { 729 TokenType.LEFT, 730 TokenType.RIGHT, 731 TokenType.FULL, 732 } 733 734 JOIN_KINDS = { 735 TokenType.ANTI, 736 TokenType.CROSS, 737 TokenType.INNER, 738 TokenType.OUTER, 739 TokenType.SEMI, 740 TokenType.STRAIGHT_JOIN, 741 } 742 743 JOIN_HINTS: t.Set[str] = set() 744 745 LAMBDAS = { 746 TokenType.ARROW: lambda self, expressions: self.expression( 747 exp.Lambda, 748 this=self._replace_lambda( 749 self._parse_assignment(), 750 expressions, 751 ), 752 expressions=expressions, 753 ), 754 TokenType.FARROW: lambda self, expressions: self.expression( 755 exp.Kwarg, 756 this=exp.var(expressions[0].name), 757 expression=self._parse_assignment(), 758 ), 759 } 760 761 COLUMN_OPERATORS = { 762 TokenType.DOT: None, 763 TokenType.DOTCOLON: lambda self, this, to: self.expression( 764 exp.JSONCast, 765 this=this, 766 to=to, 767 ), 768 TokenType.DCOLON: lambda self, this, to: self.build_cast( 769 strict=self.STRICT_CAST, this=this, to=to 770 ), 771 TokenType.ARROW: lambda self, this, path: self.expression( 772 exp.JSONExtract, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.DARROW: lambda self, this, path: self.expression( 778 exp.JSONExtractScalar, 779 this=this, 780 expression=self.dialect.to_json_path(path), 781 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 782 ), 783 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtract, 785 this=this, 786 expression=path, 787 ), 788 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 789 exp.JSONBExtractScalar, 790 this=this, 791 expression=path, 792 ), 793 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 794 exp.JSONBContains, 795 this=this, 796 expression=key, 797 ), 798 } 799 800 CAST_COLUMN_OPERATORS = { 801 TokenType.DOTCOLON, 802 TokenType.DCOLON, 803 } 804 805 EXPRESSION_PARSERS = { 806 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 807 exp.Column: lambda self: self._parse_column(), 808 exp.Condition: lambda self: self._parse_assignment(), 809 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 810 exp.Expression: lambda self: self._parse_expression(), 811 exp.From: lambda self: self._parse_from(joins=True), 812 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 813 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 814 exp.Group: lambda self: self._parse_group(), 815 exp.Having: lambda self: self._parse_having(), 816 exp.Hint: lambda self: self._parse_hint_body(), 817 exp.Identifier: lambda self: self._parse_id_var(), 818 exp.Join: lambda self: self._parse_join(), 819 exp.Lambda: lambda self: self._parse_lambda(), 820 exp.Lateral: lambda self: self._parse_lateral(), 821 exp.Limit: lambda self: self._parse_limit(), 822 exp.Offset: lambda self: self._parse_offset(), 823 exp.Order: lambda self: self._parse_order(), 824 exp.Ordered: lambda self: self._parse_ordered(), 825 exp.Properties: lambda self: self._parse_properties(), 826 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 827 exp.Qualify: lambda self: self._parse_qualify(), 828 exp.Returning: lambda self: self._parse_returning(), 829 exp.Select: lambda self: self._parse_select(), 830 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 831 exp.Table: lambda self: self._parse_table_parts(), 832 exp.TableAlias: lambda self: self._parse_table_alias(), 833 exp.Tuple: lambda self: self._parse_value(values=False), 834 exp.Whens: lambda self: self._parse_when_matched(), 835 exp.Where: lambda self: self._parse_where(), 836 exp.Window: lambda self: self._parse_named_window(), 837 exp.With: lambda self: self._parse_with(), 838 "JOIN_TYPE": lambda self: self._parse_join_parts(), 839 } 840 841 STATEMENT_PARSERS = { 842 TokenType.ALTER: lambda self: self._parse_alter(), 843 TokenType.ANALYZE: lambda self: self._parse_analyze(), 844 TokenType.BEGIN: lambda self: self._parse_transaction(), 845 TokenType.CACHE: lambda self: self._parse_cache(), 846 TokenType.COMMENT: lambda self: self._parse_comment(), 847 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 848 TokenType.COPY: lambda self: self._parse_copy(), 849 TokenType.CREATE: lambda self: self._parse_create(), 850 TokenType.DELETE: lambda self: self._parse_delete(), 851 TokenType.DESC: lambda self: self._parse_describe(), 852 TokenType.DESCRIBE: lambda self: self._parse_describe(), 853 TokenType.DROP: lambda self: self._parse_drop(), 854 TokenType.GRANT: lambda self: self._parse_grant(), 855 TokenType.REVOKE: lambda self: self._parse_revoke(), 856 TokenType.INSERT: lambda self: self._parse_insert(), 857 TokenType.KILL: lambda self: self._parse_kill(), 858 TokenType.LOAD: lambda self: self._parse_load(), 859 TokenType.MERGE: lambda self: self._parse_merge(), 860 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 861 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 862 TokenType.REFRESH: lambda self: self._parse_refresh(), 863 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 864 TokenType.SET: lambda self: self._parse_set(), 865 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 866 TokenType.UNCACHE: lambda self: self._parse_uncache(), 867 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 868 TokenType.UPDATE: lambda self: self._parse_update(), 869 TokenType.USE: lambda self: self._parse_use(), 870 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 871 } 872 873 UNARY_PARSERS = { 874 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 875 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 876 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 877 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 878 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 879 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 880 } 881 882 STRING_PARSERS = { 883 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 884 exp.RawString, this=token.text 885 ), 886 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 887 exp.National, this=token.text 888 ), 889 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 890 TokenType.STRING: lambda self, token: self.expression( 891 exp.Literal, this=token.text, is_string=True 892 ), 893 TokenType.UNICODE_STRING: lambda self, token: self.expression( 894 exp.UnicodeString, 895 this=token.text, 896 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 897 ), 898 } 899 900 NUMERIC_PARSERS = { 901 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 902 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 903 TokenType.HEX_STRING: lambda self, token: self.expression( 904 exp.HexString, 905 this=token.text, 906 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 907 ), 908 TokenType.NUMBER: lambda self, token: self.expression( 909 exp.Literal, this=token.text, is_string=False 910 ), 911 } 912 913 PRIMARY_PARSERS = { 914 **STRING_PARSERS, 915 **NUMERIC_PARSERS, 916 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 917 TokenType.NULL: lambda self, _: self.expression(exp.Null), 918 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 919 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 920 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 921 TokenType.STAR: lambda self, _: self._parse_star_ops(), 922 } 923 924 PLACEHOLDER_PARSERS = { 925 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 926 TokenType.PARAMETER: lambda self: self._parse_parameter(), 927 TokenType.COLON: lambda self: ( 928 self.expression(exp.Placeholder, this=self._prev.text) 929 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 930 else None 931 ), 932 } 933 934 RANGE_PARSERS = { 935 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 936 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 937 TokenType.GLOB: binary_range_parser(exp.Glob), 938 TokenType.ILIKE: binary_range_parser(exp.ILike), 939 TokenType.IN: lambda self, this: self._parse_in(this), 940 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 941 TokenType.IS: lambda self, this: self._parse_is(this), 942 TokenType.LIKE: binary_range_parser(exp.Like), 943 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 944 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 945 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 946 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 947 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 948 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 949 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 950 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 951 } 952 953 PIPE_SYNTAX_TRANSFORM_PARSERS = { 954 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 955 "AS": lambda self, query: self._build_pipe_cte( 956 query, [exp.Star()], self._parse_table_alias() 957 ), 958 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 959 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 960 "ORDER BY": lambda self, query: query.order_by( 961 self._parse_order(), append=False, copy=False 962 ), 963 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 964 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 965 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 966 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 967 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 968 } 969 970 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 971 "ALLOWED_VALUES": lambda self: self.expression( 972 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 973 ), 974 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 975 "AUTO": lambda self: self._parse_auto_property(), 976 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 977 "BACKUP": lambda self: self.expression( 978 exp.BackupProperty, this=self._parse_var(any_token=True) 979 ), 980 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 981 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 982 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 983 "CHECKSUM": lambda self: self._parse_checksum(), 984 "CLUSTER BY": lambda self: self._parse_cluster(), 985 "CLUSTERED": lambda self: self._parse_clustered_by(), 986 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 987 exp.CollateProperty, **kwargs 988 ), 989 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 990 "CONTAINS": lambda self: self._parse_contains_property(), 991 "COPY": lambda self: self._parse_copy_property(), 992 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 993 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 994 "DEFINER": lambda self: self._parse_definer(), 995 "DETERMINISTIC": lambda self: self.expression( 996 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 997 ), 998 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 999 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1000 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1001 "DISTKEY": lambda self: self._parse_distkey(), 1002 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1003 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1004 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1005 "ENVIRONMENT": lambda self: self.expression( 1006 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1007 ), 1008 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1009 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1010 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1011 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1012 "FREESPACE": lambda self: self._parse_freespace(), 1013 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1014 "HEAP": lambda self: self.expression(exp.HeapProperty), 1015 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1016 "IMMUTABLE": lambda self: self.expression( 1017 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1018 ), 1019 "INHERITS": lambda self: self.expression( 1020 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1021 ), 1022 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1023 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1024 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1025 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1026 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1027 "LIKE": lambda self: self._parse_create_like(), 1028 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1029 "LOCK": lambda self: self._parse_locking(), 1030 "LOCKING": lambda self: self._parse_locking(), 1031 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1032 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1033 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1034 "MODIFIES": lambda self: self._parse_modifies_property(), 1035 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1036 "NO": lambda self: self._parse_no_property(), 1037 "ON": lambda self: self._parse_on_property(), 1038 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1039 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1040 "PARTITION": lambda self: self._parse_partitioned_of(), 1041 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1042 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1043 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1044 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1045 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1046 "READS": lambda self: self._parse_reads_property(), 1047 "REMOTE": lambda self: self._parse_remote_with_connection(), 1048 "RETURNS": lambda self: self._parse_returns(), 1049 "STRICT": lambda self: self.expression(exp.StrictProperty), 1050 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1051 "ROW": lambda self: self._parse_row(), 1052 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1053 "SAMPLE": lambda self: self.expression( 1054 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1055 ), 1056 "SECURE": lambda self: self.expression(exp.SecureProperty), 1057 "SECURITY": lambda self: self._parse_security(), 1058 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1059 "SETTINGS": lambda self: self._parse_settings_property(), 1060 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1061 "SORTKEY": lambda self: self._parse_sortkey(), 1062 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1063 "STABLE": lambda self: self.expression( 1064 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1065 ), 1066 "STORED": lambda self: self._parse_stored(), 1067 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1068 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1069 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1070 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1071 "TO": lambda self: self._parse_to_table(), 1072 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1073 "TRANSFORM": lambda self: self.expression( 1074 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1075 ), 1076 "TTL": lambda self: self._parse_ttl(), 1077 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1078 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1079 "VOLATILE": lambda self: self._parse_volatile_property(), 1080 "WITH": lambda self: self._parse_with_property(), 1081 } 1082 1083 CONSTRAINT_PARSERS = { 1084 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1085 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1086 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1087 "CHARACTER SET": lambda self: self.expression( 1088 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1089 ), 1090 "CHECK": lambda self: self.expression( 1091 exp.CheckColumnConstraint, 1092 this=self._parse_wrapped(self._parse_assignment), 1093 enforced=self._match_text_seq("ENFORCED"), 1094 ), 1095 "COLLATE": lambda self: self.expression( 1096 exp.CollateColumnConstraint, 1097 this=self._parse_identifier() or self._parse_column(), 1098 ), 1099 "COMMENT": lambda self: self.expression( 1100 exp.CommentColumnConstraint, this=self._parse_string() 1101 ), 1102 "COMPRESS": lambda self: self._parse_compress(), 1103 "CLUSTERED": lambda self: self.expression( 1104 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1105 ), 1106 "NONCLUSTERED": lambda self: self.expression( 1107 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1108 ), 1109 "DEFAULT": lambda self: self.expression( 1110 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1111 ), 1112 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1113 "EPHEMERAL": lambda self: self.expression( 1114 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1115 ), 1116 "EXCLUDE": lambda self: self.expression( 1117 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1118 ), 1119 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1120 "FORMAT": lambda self: self.expression( 1121 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1122 ), 1123 "GENERATED": lambda self: self._parse_generated_as_identity(), 1124 "IDENTITY": lambda self: self._parse_auto_increment(), 1125 "INLINE": lambda self: self._parse_inline(), 1126 "LIKE": lambda self: self._parse_create_like(), 1127 "NOT": lambda self: self._parse_not_constraint(), 1128 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1129 "ON": lambda self: ( 1130 self._match(TokenType.UPDATE) 1131 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1132 ) 1133 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1134 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1135 "PERIOD": lambda self: self._parse_period_for_system_time(), 1136 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1137 "REFERENCES": lambda self: self._parse_references(match=False), 1138 "TITLE": lambda self: self.expression( 1139 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1140 ), 1141 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1142 "UNIQUE": lambda self: self._parse_unique(), 1143 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1144 "WATERMARK": lambda self: self.expression( 1145 exp.WatermarkColumnConstraint, 1146 this=self._match(TokenType.FOR) and self._parse_column(), 1147 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1148 ), 1149 "WITH": lambda self: self.expression( 1150 exp.Properties, expressions=self._parse_wrapped_properties() 1151 ), 1152 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1153 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1154 } 1155 1156 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1157 if not self._match(TokenType.L_PAREN, advance=False): 1158 # Partitioning by bucket or truncate follows the syntax: 1159 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1160 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1161 self._retreat(self._index - 1) 1162 return None 1163 1164 klass = ( 1165 exp.PartitionedByBucket 1166 if self._prev.text.upper() == "BUCKET" 1167 else exp.PartitionByTruncate 1168 ) 1169 1170 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1171 this, expression = seq_get(args, 0), seq_get(args, 1) 1172 1173 if isinstance(this, exp.Literal): 1174 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1175 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1176 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1177 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1178 # 1179 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1180 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1181 this, expression = expression, this 1182 1183 return self.expression(klass, this=this, expression=expression) 1184 1185 ALTER_PARSERS = { 1186 "ADD": lambda self: self._parse_alter_table_add(), 1187 "AS": lambda self: self._parse_select(), 1188 "ALTER": lambda self: self._parse_alter_table_alter(), 1189 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1190 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1191 "DROP": lambda self: self._parse_alter_table_drop(), 1192 "RENAME": lambda self: self._parse_alter_table_rename(), 1193 "SET": lambda self: self._parse_alter_table_set(), 1194 "SWAP": lambda self: self.expression( 1195 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1196 ), 1197 } 1198 1199 ALTER_ALTER_PARSERS = { 1200 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1201 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1202 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1203 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1204 } 1205 1206 SCHEMA_UNNAMED_CONSTRAINTS = { 1207 "CHECK", 1208 "EXCLUDE", 1209 "FOREIGN KEY", 1210 "LIKE", 1211 "PERIOD", 1212 "PRIMARY KEY", 1213 "UNIQUE", 1214 "WATERMARK", 1215 "BUCKET", 1216 "TRUNCATE", 1217 } 1218 1219 NO_PAREN_FUNCTION_PARSERS = { 1220 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1221 "CASE": lambda self: self._parse_case(), 1222 "CONNECT_BY_ROOT": lambda self: self.expression( 1223 exp.ConnectByRoot, this=self._parse_column() 1224 ), 1225 "IF": lambda self: self._parse_if(), 1226 } 1227 1228 INVALID_FUNC_NAME_TOKENS = { 1229 TokenType.IDENTIFIER, 1230 TokenType.STRING, 1231 } 1232 1233 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1234 1235 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1236 1237 FUNCTION_PARSERS = { 1238 **{ 1239 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1240 }, 1241 **{ 1242 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1243 }, 1244 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1245 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1246 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1247 "DECODE": lambda self: self._parse_decode(), 1248 "EXTRACT": lambda self: self._parse_extract(), 1249 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1250 "GAP_FILL": lambda self: self._parse_gap_fill(), 1251 "JSON_OBJECT": lambda self: self._parse_json_object(), 1252 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1253 "JSON_TABLE": lambda self: self._parse_json_table(), 1254 "MATCH": lambda self: self._parse_match_against(), 1255 "NORMALIZE": lambda self: self._parse_normalize(), 1256 "OPENJSON": lambda self: self._parse_open_json(), 1257 "OVERLAY": lambda self: self._parse_overlay(), 1258 "POSITION": lambda self: self._parse_position(), 1259 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1260 "STRING_AGG": lambda self: self._parse_string_agg(), 1261 "SUBSTRING": lambda self: self._parse_substring(), 1262 "TRIM": lambda self: self._parse_trim(), 1263 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1264 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1265 "XMLELEMENT": lambda self: self.expression( 1266 exp.XMLElement, 1267 this=self._match_text_seq("NAME") and self._parse_id_var(), 1268 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1269 ), 1270 "XMLTABLE": lambda self: self._parse_xml_table(), 1271 } 1272 1273 QUERY_MODIFIER_PARSERS = { 1274 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1275 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1276 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1277 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1278 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1279 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1280 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1281 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1282 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1283 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1284 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1285 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1286 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1287 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1288 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1289 TokenType.CLUSTER_BY: lambda self: ( 1290 "cluster", 1291 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1292 ), 1293 TokenType.DISTRIBUTE_BY: lambda self: ( 1294 "distribute", 1295 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1296 ), 1297 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1298 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1299 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1300 } 1301 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1302 1303 SET_PARSERS = { 1304 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1305 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1306 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1307 "TRANSACTION": lambda self: self._parse_set_transaction(), 1308 } 1309 1310 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1311 1312 TYPE_LITERAL_PARSERS = { 1313 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1314 } 1315 1316 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1317 1318 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1319 1320 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1321 1322 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1323 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1324 "ISOLATION": ( 1325 ("LEVEL", "REPEATABLE", "READ"), 1326 ("LEVEL", "READ", "COMMITTED"), 1327 ("LEVEL", "READ", "UNCOMITTED"), 1328 ("LEVEL", "SERIALIZABLE"), 1329 ), 1330 "READ": ("WRITE", "ONLY"), 1331 } 1332 1333 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1334 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1335 ) 1336 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1337 1338 CREATE_SEQUENCE: OPTIONS_TYPE = { 1339 "SCALE": ("EXTEND", "NOEXTEND"), 1340 "SHARD": ("EXTEND", "NOEXTEND"), 1341 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1342 **dict.fromkeys( 1343 ( 1344 "SESSION", 1345 "GLOBAL", 1346 "KEEP", 1347 "NOKEEP", 1348 "ORDER", 1349 "NOORDER", 1350 "NOCACHE", 1351 "CYCLE", 1352 "NOCYCLE", 1353 "NOMINVALUE", 1354 "NOMAXVALUE", 1355 "NOSCALE", 1356 "NOSHARD", 1357 ), 1358 tuple(), 1359 ), 1360 } 1361 1362 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1363 1364 USABLES: OPTIONS_TYPE = dict.fromkeys( 1365 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1366 ) 1367 1368 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1369 1370 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1371 "TYPE": ("EVOLUTION",), 1372 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1373 } 1374 1375 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1376 1377 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1378 1379 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1380 "NOT": ("ENFORCED",), 1381 "MATCH": ( 1382 "FULL", 1383 "PARTIAL", 1384 "SIMPLE", 1385 ), 1386 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1387 "USING": ( 1388 "BTREE", 1389 "HASH", 1390 ), 1391 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1392 } 1393 1394 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1395 "NO": ("OTHERS",), 1396 "CURRENT": ("ROW",), 1397 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1398 } 1399 1400 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1401 1402 CLONE_KEYWORDS = {"CLONE", "COPY"} 1403 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1404 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1405 1406 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1407 1408 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1409 1410 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1411 1412 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1413 1414 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1415 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1416 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1417 1418 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1419 1420 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1421 1422 ADD_CONSTRAINT_TOKENS = { 1423 TokenType.CONSTRAINT, 1424 TokenType.FOREIGN_KEY, 1425 TokenType.INDEX, 1426 TokenType.KEY, 1427 TokenType.PRIMARY_KEY, 1428 TokenType.UNIQUE, 1429 } 1430 1431 DISTINCT_TOKENS = {TokenType.DISTINCT} 1432 1433 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1434 1435 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1436 1437 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1438 1439 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1440 1441 ODBC_DATETIME_LITERALS = { 1442 "d": exp.Date, 1443 "t": exp.Time, 1444 "ts": exp.Timestamp, 1445 } 1446 1447 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1448 1449 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1450 1451 # The style options for the DESCRIBE statement 1452 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1453 1454 # The style options for the ANALYZE statement 1455 ANALYZE_STYLES = { 1456 "BUFFER_USAGE_LIMIT", 1457 "FULL", 1458 "LOCAL", 1459 "NO_WRITE_TO_BINLOG", 1460 "SAMPLE", 1461 "SKIP_LOCKED", 1462 "VERBOSE", 1463 } 1464 1465 ANALYZE_EXPRESSION_PARSERS = { 1466 "ALL": lambda self: self._parse_analyze_columns(), 1467 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1468 "DELETE": lambda self: self._parse_analyze_delete(), 1469 "DROP": lambda self: self._parse_analyze_histogram(), 1470 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1471 "LIST": lambda self: self._parse_analyze_list(), 1472 "PREDICATE": lambda self: self._parse_analyze_columns(), 1473 "UPDATE": lambda self: self._parse_analyze_histogram(), 1474 "VALIDATE": lambda self: self._parse_analyze_validate(), 1475 } 1476 1477 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1478 1479 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1480 1481 OPERATION_MODIFIERS: t.Set[str] = set() 1482 1483 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1484 1485 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1486 1487 STRICT_CAST = True 1488 1489 PREFIXED_PIVOT_COLUMNS = False 1490 IDENTIFY_PIVOT_STRINGS = False 1491 1492 LOG_DEFAULTS_TO_LN = False 1493 1494 # Whether the table sample clause expects CSV syntax 1495 TABLESAMPLE_CSV = False 1496 1497 # The default method used for table sampling 1498 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1499 1500 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1501 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1502 1503 # Whether the TRIM function expects the characters to trim as its first argument 1504 TRIM_PATTERN_FIRST = False 1505 1506 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1507 STRING_ALIASES = False 1508 1509 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1510 MODIFIERS_ATTACHED_TO_SET_OP = True 1511 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1512 1513 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1514 NO_PAREN_IF_COMMANDS = True 1515 1516 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1517 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1518 1519 # Whether the `:` operator is used to extract a value from a VARIANT column 1520 COLON_IS_VARIANT_EXTRACT = False 1521 1522 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1523 # If this is True and '(' is not found, the keyword will be treated as an identifier 1524 VALUES_FOLLOWED_BY_PAREN = True 1525 1526 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1527 SUPPORTS_IMPLICIT_UNNEST = False 1528 1529 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1530 INTERVAL_SPANS = True 1531 1532 # Whether a PARTITION clause can follow a table reference 1533 SUPPORTS_PARTITION_SELECTION = False 1534 1535 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1536 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1537 1538 # Whether the 'AS' keyword is optional in the CTE definition syntax 1539 OPTIONAL_ALIAS_TOKEN_CTE = True 1540 1541 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1542 ALTER_RENAME_REQUIRES_COLUMN = True 1543 1544 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1545 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1546 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1547 # as BigQuery, where all joins have the same precedence. 1548 JOINS_HAVE_EQUAL_PRECEDENCE = False 1549 1550 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1551 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1552 1553 # Whether map literals support arbitrary expressions as keys. 1554 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1555 # When False, keys are typically restricted to identifiers. 1556 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1557 1558 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1559 # is true for Snowflake but not for BigQuery which can also process strings 1560 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1561 1562 # Dialects like Databricks support JOINS without join criteria 1563 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1564 ADD_JOIN_ON_TRUE = False 1565 1566 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1567 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1568 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1569 1570 __slots__ = ( 1571 "error_level", 1572 "error_message_context", 1573 "max_errors", 1574 "dialect", 1575 "sql", 1576 "errors", 1577 "_tokens", 1578 "_index", 1579 "_curr", 1580 "_next", 1581 "_prev", 1582 "_prev_comments", 1583 "_pipe_cte_counter", 1584 ) 1585 1586 # Autofilled 1587 SHOW_TRIE: t.Dict = {} 1588 SET_TRIE: t.Dict = {} 1589 1590 def __init__( 1591 self, 1592 error_level: t.Optional[ErrorLevel] = None, 1593 error_message_context: int = 100, 1594 max_errors: int = 3, 1595 dialect: DialectType = None, 1596 ): 1597 from sqlglot.dialects import Dialect 1598 1599 self.error_level = error_level or ErrorLevel.IMMEDIATE 1600 self.error_message_context = error_message_context 1601 self.max_errors = max_errors 1602 self.dialect = Dialect.get_or_raise(dialect) 1603 self.reset() 1604 1605 def reset(self): 1606 self.sql = "" 1607 self.errors = [] 1608 self._tokens = [] 1609 self._index = 0 1610 self._curr = None 1611 self._next = None 1612 self._prev = None 1613 self._prev_comments = None 1614 self._pipe_cte_counter = 0 1615 1616 def parse( 1617 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1618 ) -> t.List[t.Optional[exp.Expression]]: 1619 """ 1620 Parses a list of tokens and returns a list of syntax trees, one tree 1621 per parsed SQL statement. 1622 1623 Args: 1624 raw_tokens: The list of tokens. 1625 sql: The original SQL string, used to produce helpful debug messages. 1626 1627 Returns: 1628 The list of the produced syntax trees. 1629 """ 1630 return self._parse( 1631 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1632 ) 1633 1634 def parse_into( 1635 self, 1636 expression_types: exp.IntoType, 1637 raw_tokens: t.List[Token], 1638 sql: t.Optional[str] = None, 1639 ) -> t.List[t.Optional[exp.Expression]]: 1640 """ 1641 Parses a list of tokens into a given Expression type. If a collection of Expression 1642 types is given instead, this method will try to parse the token list into each one 1643 of them, stopping at the first for which the parsing succeeds. 1644 1645 Args: 1646 expression_types: The expression type(s) to try and parse the token list into. 1647 raw_tokens: The list of tokens. 1648 sql: The original SQL string, used to produce helpful debug messages. 1649 1650 Returns: 1651 The target Expression. 1652 """ 1653 errors = [] 1654 for expression_type in ensure_list(expression_types): 1655 parser = self.EXPRESSION_PARSERS.get(expression_type) 1656 if not parser: 1657 raise TypeError(f"No parser registered for {expression_type}") 1658 1659 try: 1660 return self._parse(parser, raw_tokens, sql) 1661 except ParseError as e: 1662 e.errors[0]["into_expression"] = expression_type 1663 errors.append(e) 1664 1665 raise ParseError( 1666 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1667 errors=merge_errors(errors), 1668 ) from errors[-1] 1669 1670 def _parse( 1671 self, 1672 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1673 raw_tokens: t.List[Token], 1674 sql: t.Optional[str] = None, 1675 ) -> t.List[t.Optional[exp.Expression]]: 1676 self.reset() 1677 self.sql = sql or "" 1678 1679 total = len(raw_tokens) 1680 chunks: t.List[t.List[Token]] = [[]] 1681 1682 for i, token in enumerate(raw_tokens): 1683 if token.token_type == TokenType.SEMICOLON: 1684 if token.comments: 1685 chunks.append([token]) 1686 1687 if i < total - 1: 1688 chunks.append([]) 1689 else: 1690 chunks[-1].append(token) 1691 1692 expressions = [] 1693 1694 for tokens in chunks: 1695 self._index = -1 1696 self._tokens = tokens 1697 self._advance() 1698 1699 expressions.append(parse_method(self)) 1700 1701 if self._index < len(self._tokens): 1702 self.raise_error("Invalid expression / Unexpected token") 1703 1704 self.check_errors() 1705 1706 return expressions 1707 1708 def check_errors(self) -> None: 1709 """Logs or raises any found errors, depending on the chosen error level setting.""" 1710 if self.error_level == ErrorLevel.WARN: 1711 for error in self.errors: 1712 logger.error(str(error)) 1713 elif self.error_level == ErrorLevel.RAISE and self.errors: 1714 raise ParseError( 1715 concat_messages(self.errors, self.max_errors), 1716 errors=merge_errors(self.errors), 1717 ) 1718 1719 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1720 """ 1721 Appends an error in the list of recorded errors or raises it, depending on the chosen 1722 error level setting. 1723 """ 1724 token = token or self._curr or self._prev or Token.string("") 1725 start = token.start 1726 end = token.end + 1 1727 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1728 highlight = self.sql[start:end] 1729 end_context = self.sql[end : end + self.error_message_context] 1730 1731 error = ParseError.new( 1732 f"{message}. Line {token.line}, Col: {token.col}.\n" 1733 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1734 description=message, 1735 line=token.line, 1736 col=token.col, 1737 start_context=start_context, 1738 highlight=highlight, 1739 end_context=end_context, 1740 ) 1741 1742 if self.error_level == ErrorLevel.IMMEDIATE: 1743 raise error 1744 1745 self.errors.append(error) 1746 1747 def expression( 1748 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1749 ) -> E: 1750 """ 1751 Creates a new, validated Expression. 1752 1753 Args: 1754 exp_class: The expression class to instantiate. 1755 comments: An optional list of comments to attach to the expression. 1756 kwargs: The arguments to set for the expression along with their respective values. 1757 1758 Returns: 1759 The target expression. 1760 """ 1761 instance = exp_class(**kwargs) 1762 instance.add_comments(comments) if comments else self._add_comments(instance) 1763 return self.validate_expression(instance) 1764 1765 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1766 if expression and self._prev_comments: 1767 expression.add_comments(self._prev_comments) 1768 self._prev_comments = None 1769 1770 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1771 """ 1772 Validates an Expression, making sure that all its mandatory arguments are set. 1773 1774 Args: 1775 expression: The expression to validate. 1776 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1777 1778 Returns: 1779 The validated expression. 1780 """ 1781 if self.error_level != ErrorLevel.IGNORE: 1782 for error_message in expression.error_messages(args): 1783 self.raise_error(error_message) 1784 1785 return expression 1786 1787 def _find_sql(self, start: Token, end: Token) -> str: 1788 return self.sql[start.start : end.end + 1] 1789 1790 def _is_connected(self) -> bool: 1791 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1792 1793 def _advance(self, times: int = 1) -> None: 1794 self._index += times 1795 self._curr = seq_get(self._tokens, self._index) 1796 self._next = seq_get(self._tokens, self._index + 1) 1797 1798 if self._index > 0: 1799 self._prev = self._tokens[self._index - 1] 1800 self._prev_comments = self._prev.comments 1801 else: 1802 self._prev = None 1803 self._prev_comments = None 1804 1805 def _retreat(self, index: int) -> None: 1806 if index != self._index: 1807 self._advance(index - self._index) 1808 1809 def _warn_unsupported(self) -> None: 1810 if len(self._tokens) <= 1: 1811 return 1812 1813 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1814 # interested in emitting a warning for the one being currently processed. 1815 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1816 1817 logger.warning( 1818 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1819 ) 1820 1821 def _parse_command(self) -> exp.Command: 1822 self._warn_unsupported() 1823 return self.expression( 1824 exp.Command, 1825 comments=self._prev_comments, 1826 this=self._prev.text.upper(), 1827 expression=self._parse_string(), 1828 ) 1829 1830 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1831 """ 1832 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1833 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1834 solve this by setting & resetting the parser state accordingly 1835 """ 1836 index = self._index 1837 error_level = self.error_level 1838 1839 self.error_level = ErrorLevel.IMMEDIATE 1840 try: 1841 this = parse_method() 1842 except ParseError: 1843 this = None 1844 finally: 1845 if not this or retreat: 1846 self._retreat(index) 1847 self.error_level = error_level 1848 1849 return this 1850 1851 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1852 start = self._prev 1853 exists = self._parse_exists() if allow_exists else None 1854 1855 self._match(TokenType.ON) 1856 1857 materialized = self._match_text_seq("MATERIALIZED") 1858 kind = self._match_set(self.CREATABLES) and self._prev 1859 if not kind: 1860 return self._parse_as_command(start) 1861 1862 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1863 this = self._parse_user_defined_function(kind=kind.token_type) 1864 elif kind.token_type == TokenType.TABLE: 1865 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1866 elif kind.token_type == TokenType.COLUMN: 1867 this = self._parse_column() 1868 else: 1869 this = self._parse_id_var() 1870 1871 self._match(TokenType.IS) 1872 1873 return self.expression( 1874 exp.Comment, 1875 this=this, 1876 kind=kind.text, 1877 expression=self._parse_string(), 1878 exists=exists, 1879 materialized=materialized, 1880 ) 1881 1882 def _parse_to_table( 1883 self, 1884 ) -> exp.ToTableProperty: 1885 table = self._parse_table_parts(schema=True) 1886 return self.expression(exp.ToTableProperty, this=table) 1887 1888 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1889 def _parse_ttl(self) -> exp.Expression: 1890 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1891 this = self._parse_bitwise() 1892 1893 if self._match_text_seq("DELETE"): 1894 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1895 if self._match_text_seq("RECOMPRESS"): 1896 return self.expression( 1897 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1898 ) 1899 if self._match_text_seq("TO", "DISK"): 1900 return self.expression( 1901 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1902 ) 1903 if self._match_text_seq("TO", "VOLUME"): 1904 return self.expression( 1905 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1906 ) 1907 1908 return this 1909 1910 expressions = self._parse_csv(_parse_ttl_action) 1911 where = self._parse_where() 1912 group = self._parse_group() 1913 1914 aggregates = None 1915 if group and self._match(TokenType.SET): 1916 aggregates = self._parse_csv(self._parse_set_item) 1917 1918 return self.expression( 1919 exp.MergeTreeTTL, 1920 expressions=expressions, 1921 where=where, 1922 group=group, 1923 aggregates=aggregates, 1924 ) 1925 1926 def _parse_statement(self) -> t.Optional[exp.Expression]: 1927 if self._curr is None: 1928 return None 1929 1930 if self._match_set(self.STATEMENT_PARSERS): 1931 comments = self._prev_comments 1932 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1933 stmt.add_comments(comments, prepend=True) 1934 return stmt 1935 1936 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1937 return self._parse_command() 1938 1939 expression = self._parse_expression() 1940 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1941 return self._parse_query_modifiers(expression) 1942 1943 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1944 start = self._prev 1945 temporary = self._match(TokenType.TEMPORARY) 1946 materialized = self._match_text_seq("MATERIALIZED") 1947 1948 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1949 if not kind: 1950 return self._parse_as_command(start) 1951 1952 concurrently = self._match_text_seq("CONCURRENTLY") 1953 if_exists = exists or self._parse_exists() 1954 1955 if kind == "COLUMN": 1956 this = self._parse_column() 1957 else: 1958 this = self._parse_table_parts( 1959 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1960 ) 1961 1962 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1963 1964 if self._match(TokenType.L_PAREN, advance=False): 1965 expressions = self._parse_wrapped_csv(self._parse_types) 1966 else: 1967 expressions = None 1968 1969 return self.expression( 1970 exp.Drop, 1971 exists=if_exists, 1972 this=this, 1973 expressions=expressions, 1974 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1975 temporary=temporary, 1976 materialized=materialized, 1977 cascade=self._match_text_seq("CASCADE"), 1978 constraints=self._match_text_seq("CONSTRAINTS"), 1979 purge=self._match_text_seq("PURGE"), 1980 cluster=cluster, 1981 concurrently=concurrently, 1982 ) 1983 1984 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1985 return ( 1986 self._match_text_seq("IF") 1987 and (not not_ or self._match(TokenType.NOT)) 1988 and self._match(TokenType.EXISTS) 1989 ) 1990 1991 def _parse_create(self) -> exp.Create | exp.Command: 1992 # Note: this can't be None because we've matched a statement parser 1993 start = self._prev 1994 1995 replace = ( 1996 start.token_type == TokenType.REPLACE 1997 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1998 or self._match_pair(TokenType.OR, TokenType.ALTER) 1999 ) 2000 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2001 2002 unique = self._match(TokenType.UNIQUE) 2003 2004 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2005 clustered = True 2006 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2007 "COLUMNSTORE" 2008 ): 2009 clustered = False 2010 else: 2011 clustered = None 2012 2013 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2014 self._advance() 2015 2016 properties = None 2017 create_token = self._match_set(self.CREATABLES) and self._prev 2018 2019 if not create_token: 2020 # exp.Properties.Location.POST_CREATE 2021 properties = self._parse_properties() 2022 create_token = self._match_set(self.CREATABLES) and self._prev 2023 2024 if not properties or not create_token: 2025 return self._parse_as_command(start) 2026 2027 concurrently = self._match_text_seq("CONCURRENTLY") 2028 exists = self._parse_exists(not_=True) 2029 this = None 2030 expression: t.Optional[exp.Expression] = None 2031 indexes = None 2032 no_schema_binding = None 2033 begin = None 2034 end = None 2035 clone = None 2036 2037 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2038 nonlocal properties 2039 if properties and temp_props: 2040 properties.expressions.extend(temp_props.expressions) 2041 elif temp_props: 2042 properties = temp_props 2043 2044 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2045 this = self._parse_user_defined_function(kind=create_token.token_type) 2046 2047 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2048 extend_props(self._parse_properties()) 2049 2050 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2051 extend_props(self._parse_properties()) 2052 2053 if not expression: 2054 if self._match(TokenType.COMMAND): 2055 expression = self._parse_as_command(self._prev) 2056 else: 2057 begin = self._match(TokenType.BEGIN) 2058 return_ = self._match_text_seq("RETURN") 2059 2060 if self._match(TokenType.STRING, advance=False): 2061 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2062 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2063 expression = self._parse_string() 2064 extend_props(self._parse_properties()) 2065 else: 2066 expression = self._parse_user_defined_function_expression() 2067 2068 end = self._match_text_seq("END") 2069 2070 if return_: 2071 expression = self.expression(exp.Return, this=expression) 2072 elif create_token.token_type == TokenType.INDEX: 2073 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2074 if not self._match(TokenType.ON): 2075 index = self._parse_id_var() 2076 anonymous = False 2077 else: 2078 index = None 2079 anonymous = True 2080 2081 this = self._parse_index(index=index, anonymous=anonymous) 2082 elif create_token.token_type in self.DB_CREATABLES: 2083 table_parts = self._parse_table_parts( 2084 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2085 ) 2086 2087 # exp.Properties.Location.POST_NAME 2088 self._match(TokenType.COMMA) 2089 extend_props(self._parse_properties(before=True)) 2090 2091 this = self._parse_schema(this=table_parts) 2092 2093 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2094 extend_props(self._parse_properties()) 2095 2096 has_alias = self._match(TokenType.ALIAS) 2097 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2098 # exp.Properties.Location.POST_ALIAS 2099 extend_props(self._parse_properties()) 2100 2101 if create_token.token_type == TokenType.SEQUENCE: 2102 expression = self._parse_types() 2103 props = self._parse_properties() 2104 if props: 2105 sequence_props = exp.SequenceProperties() 2106 options = [] 2107 for prop in props: 2108 if isinstance(prop, exp.SequenceProperties): 2109 for arg, value in prop.args.items(): 2110 if arg == "options": 2111 options.extend(value) 2112 else: 2113 sequence_props.set(arg, value) 2114 prop.pop() 2115 2116 if options: 2117 sequence_props.set("options", options) 2118 2119 props.append("expressions", sequence_props) 2120 extend_props(props) 2121 else: 2122 expression = self._parse_ddl_select() 2123 2124 # Some dialects also support using a table as an alias instead of a SELECT. 2125 # Here we fallback to this as an alternative. 2126 if not expression and has_alias: 2127 expression = self._try_parse(self._parse_table_parts) 2128 2129 if create_token.token_type == TokenType.TABLE: 2130 # exp.Properties.Location.POST_EXPRESSION 2131 extend_props(self._parse_properties()) 2132 2133 indexes = [] 2134 while True: 2135 index = self._parse_index() 2136 2137 # exp.Properties.Location.POST_INDEX 2138 extend_props(self._parse_properties()) 2139 if not index: 2140 break 2141 else: 2142 self._match(TokenType.COMMA) 2143 indexes.append(index) 2144 elif create_token.token_type == TokenType.VIEW: 2145 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2146 no_schema_binding = True 2147 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2148 extend_props(self._parse_properties()) 2149 2150 shallow = self._match_text_seq("SHALLOW") 2151 2152 if self._match_texts(self.CLONE_KEYWORDS): 2153 copy = self._prev.text.lower() == "copy" 2154 clone = self.expression( 2155 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2156 ) 2157 2158 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2159 return self._parse_as_command(start) 2160 2161 create_kind_text = create_token.text.upper() 2162 return self.expression( 2163 exp.Create, 2164 this=this, 2165 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2166 replace=replace, 2167 refresh=refresh, 2168 unique=unique, 2169 expression=expression, 2170 exists=exists, 2171 properties=properties, 2172 indexes=indexes, 2173 no_schema_binding=no_schema_binding, 2174 begin=begin, 2175 end=end, 2176 clone=clone, 2177 concurrently=concurrently, 2178 clustered=clustered, 2179 ) 2180 2181 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2182 seq = exp.SequenceProperties() 2183 2184 options = [] 2185 index = self._index 2186 2187 while self._curr: 2188 self._match(TokenType.COMMA) 2189 if self._match_text_seq("INCREMENT"): 2190 self._match_text_seq("BY") 2191 self._match_text_seq("=") 2192 seq.set("increment", self._parse_term()) 2193 elif self._match_text_seq("MINVALUE"): 2194 seq.set("minvalue", self._parse_term()) 2195 elif self._match_text_seq("MAXVALUE"): 2196 seq.set("maxvalue", self._parse_term()) 2197 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2198 self._match_text_seq("=") 2199 seq.set("start", self._parse_term()) 2200 elif self._match_text_seq("CACHE"): 2201 # T-SQL allows empty CACHE which is initialized dynamically 2202 seq.set("cache", self._parse_number() or True) 2203 elif self._match_text_seq("OWNED", "BY"): 2204 # "OWNED BY NONE" is the default 2205 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2206 else: 2207 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2208 if opt: 2209 options.append(opt) 2210 else: 2211 break 2212 2213 seq.set("options", options if options else None) 2214 return None if self._index == index else seq 2215 2216 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2217 # only used for teradata currently 2218 self._match(TokenType.COMMA) 2219 2220 kwargs = { 2221 "no": self._match_text_seq("NO"), 2222 "dual": self._match_text_seq("DUAL"), 2223 "before": self._match_text_seq("BEFORE"), 2224 "default": self._match_text_seq("DEFAULT"), 2225 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2226 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2227 "after": self._match_text_seq("AFTER"), 2228 "minimum": self._match_texts(("MIN", "MINIMUM")), 2229 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2230 } 2231 2232 if self._match_texts(self.PROPERTY_PARSERS): 2233 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2234 try: 2235 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2236 except TypeError: 2237 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2238 2239 return None 2240 2241 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2242 return self._parse_wrapped_csv(self._parse_property) 2243 2244 def _parse_property(self) -> t.Optional[exp.Expression]: 2245 if self._match_texts(self.PROPERTY_PARSERS): 2246 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2247 2248 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2249 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2250 2251 if self._match_text_seq("COMPOUND", "SORTKEY"): 2252 return self._parse_sortkey(compound=True) 2253 2254 if self._match_text_seq("SQL", "SECURITY"): 2255 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2256 2257 index = self._index 2258 2259 seq_props = self._parse_sequence_properties() 2260 if seq_props: 2261 return seq_props 2262 2263 self._retreat(index) 2264 key = self._parse_column() 2265 2266 if not self._match(TokenType.EQ): 2267 self._retreat(index) 2268 return None 2269 2270 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2271 if isinstance(key, exp.Column): 2272 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2273 2274 value = self._parse_bitwise() or self._parse_var(any_token=True) 2275 2276 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2277 if isinstance(value, exp.Column): 2278 value = exp.var(value.name) 2279 2280 return self.expression(exp.Property, this=key, value=value) 2281 2282 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2283 if self._match_text_seq("BY"): 2284 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2285 2286 self._match(TokenType.ALIAS) 2287 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2288 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2289 2290 return self.expression( 2291 exp.FileFormatProperty, 2292 this=( 2293 self.expression( 2294 exp.InputOutputFormat, 2295 input_format=input_format, 2296 output_format=output_format, 2297 ) 2298 if input_format or output_format 2299 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2300 ), 2301 hive_format=True, 2302 ) 2303 2304 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2305 field = self._parse_field() 2306 if isinstance(field, exp.Identifier) and not field.quoted: 2307 field = exp.var(field) 2308 2309 return field 2310 2311 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2312 self._match(TokenType.EQ) 2313 self._match(TokenType.ALIAS) 2314 2315 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2316 2317 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2318 properties = [] 2319 while True: 2320 if before: 2321 prop = self._parse_property_before() 2322 else: 2323 prop = self._parse_property() 2324 if not prop: 2325 break 2326 for p in ensure_list(prop): 2327 properties.append(p) 2328 2329 if properties: 2330 return self.expression(exp.Properties, expressions=properties) 2331 2332 return None 2333 2334 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2335 return self.expression( 2336 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2337 ) 2338 2339 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2340 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2341 security_specifier = self._prev.text.upper() 2342 return self.expression(exp.SecurityProperty, this=security_specifier) 2343 return None 2344 2345 def _parse_settings_property(self) -> exp.SettingsProperty: 2346 return self.expression( 2347 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2348 ) 2349 2350 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2351 if self._index >= 2: 2352 pre_volatile_token = self._tokens[self._index - 2] 2353 else: 2354 pre_volatile_token = None 2355 2356 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2357 return exp.VolatileProperty() 2358 2359 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2360 2361 def _parse_retention_period(self) -> exp.Var: 2362 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2363 number = self._parse_number() 2364 number_str = f"{number} " if number else "" 2365 unit = self._parse_var(any_token=True) 2366 return exp.var(f"{number_str}{unit}") 2367 2368 def _parse_system_versioning_property( 2369 self, with_: bool = False 2370 ) -> exp.WithSystemVersioningProperty: 2371 self._match(TokenType.EQ) 2372 prop = self.expression( 2373 exp.WithSystemVersioningProperty, 2374 **{ # type: ignore 2375 "on": True, 2376 "with": with_, 2377 }, 2378 ) 2379 2380 if self._match_text_seq("OFF"): 2381 prop.set("on", False) 2382 return prop 2383 2384 self._match(TokenType.ON) 2385 if self._match(TokenType.L_PAREN): 2386 while self._curr and not self._match(TokenType.R_PAREN): 2387 if self._match_text_seq("HISTORY_TABLE", "="): 2388 prop.set("this", self._parse_table_parts()) 2389 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2390 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2391 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2392 prop.set("retention_period", self._parse_retention_period()) 2393 2394 self._match(TokenType.COMMA) 2395 2396 return prop 2397 2398 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2399 self._match(TokenType.EQ) 2400 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2401 prop = self.expression(exp.DataDeletionProperty, on=on) 2402 2403 if self._match(TokenType.L_PAREN): 2404 while self._curr and not self._match(TokenType.R_PAREN): 2405 if self._match_text_seq("FILTER_COLUMN", "="): 2406 prop.set("filter_column", self._parse_column()) 2407 elif self._match_text_seq("RETENTION_PERIOD", "="): 2408 prop.set("retention_period", self._parse_retention_period()) 2409 2410 self._match(TokenType.COMMA) 2411 2412 return prop 2413 2414 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2415 kind = "HASH" 2416 expressions: t.Optional[t.List[exp.Expression]] = None 2417 if self._match_text_seq("BY", "HASH"): 2418 expressions = self._parse_wrapped_csv(self._parse_id_var) 2419 elif self._match_text_seq("BY", "RANDOM"): 2420 kind = "RANDOM" 2421 2422 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2423 buckets: t.Optional[exp.Expression] = None 2424 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2425 buckets = self._parse_number() 2426 2427 return self.expression( 2428 exp.DistributedByProperty, 2429 expressions=expressions, 2430 kind=kind, 2431 buckets=buckets, 2432 order=self._parse_order(), 2433 ) 2434 2435 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2436 self._match_text_seq("KEY") 2437 expressions = self._parse_wrapped_id_vars() 2438 return self.expression(expr_type, expressions=expressions) 2439 2440 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2441 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2442 prop = self._parse_system_versioning_property(with_=True) 2443 self._match_r_paren() 2444 return prop 2445 2446 if self._match(TokenType.L_PAREN, advance=False): 2447 return self._parse_wrapped_properties() 2448 2449 if self._match_text_seq("JOURNAL"): 2450 return self._parse_withjournaltable() 2451 2452 if self._match_texts(self.VIEW_ATTRIBUTES): 2453 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2454 2455 if self._match_text_seq("DATA"): 2456 return self._parse_withdata(no=False) 2457 elif self._match_text_seq("NO", "DATA"): 2458 return self._parse_withdata(no=True) 2459 2460 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2461 return self._parse_serde_properties(with_=True) 2462 2463 if self._match(TokenType.SCHEMA): 2464 return self.expression( 2465 exp.WithSchemaBindingProperty, 2466 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2467 ) 2468 2469 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2470 return self.expression( 2471 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2472 ) 2473 2474 if not self._next: 2475 return None 2476 2477 return self._parse_withisolatedloading() 2478 2479 def _parse_procedure_option(self) -> exp.Expression | None: 2480 if self._match_text_seq("EXECUTE", "AS"): 2481 return self.expression( 2482 exp.ExecuteAsProperty, 2483 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2484 or self._parse_string(), 2485 ) 2486 2487 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2488 2489 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2490 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2491 self._match(TokenType.EQ) 2492 2493 user = self._parse_id_var() 2494 self._match(TokenType.PARAMETER) 2495 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2496 2497 if not user or not host: 2498 return None 2499 2500 return exp.DefinerProperty(this=f"{user}@{host}") 2501 2502 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2503 self._match(TokenType.TABLE) 2504 self._match(TokenType.EQ) 2505 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2506 2507 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2508 return self.expression(exp.LogProperty, no=no) 2509 2510 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2511 return self.expression(exp.JournalProperty, **kwargs) 2512 2513 def _parse_checksum(self) -> exp.ChecksumProperty: 2514 self._match(TokenType.EQ) 2515 2516 on = None 2517 if self._match(TokenType.ON): 2518 on = True 2519 elif self._match_text_seq("OFF"): 2520 on = False 2521 2522 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2523 2524 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2525 return self.expression( 2526 exp.Cluster, 2527 expressions=( 2528 self._parse_wrapped_csv(self._parse_ordered) 2529 if wrapped 2530 else self._parse_csv(self._parse_ordered) 2531 ), 2532 ) 2533 2534 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2535 self._match_text_seq("BY") 2536 2537 self._match_l_paren() 2538 expressions = self._parse_csv(self._parse_column) 2539 self._match_r_paren() 2540 2541 if self._match_text_seq("SORTED", "BY"): 2542 self._match_l_paren() 2543 sorted_by = self._parse_csv(self._parse_ordered) 2544 self._match_r_paren() 2545 else: 2546 sorted_by = None 2547 2548 self._match(TokenType.INTO) 2549 buckets = self._parse_number() 2550 self._match_text_seq("BUCKETS") 2551 2552 return self.expression( 2553 exp.ClusteredByProperty, 2554 expressions=expressions, 2555 sorted_by=sorted_by, 2556 buckets=buckets, 2557 ) 2558 2559 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2560 if not self._match_text_seq("GRANTS"): 2561 self._retreat(self._index - 1) 2562 return None 2563 2564 return self.expression(exp.CopyGrantsProperty) 2565 2566 def _parse_freespace(self) -> exp.FreespaceProperty: 2567 self._match(TokenType.EQ) 2568 return self.expression( 2569 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2570 ) 2571 2572 def _parse_mergeblockratio( 2573 self, no: bool = False, default: bool = False 2574 ) -> exp.MergeBlockRatioProperty: 2575 if self._match(TokenType.EQ): 2576 return self.expression( 2577 exp.MergeBlockRatioProperty, 2578 this=self._parse_number(), 2579 percent=self._match(TokenType.PERCENT), 2580 ) 2581 2582 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2583 2584 def _parse_datablocksize( 2585 self, 2586 default: t.Optional[bool] = None, 2587 minimum: t.Optional[bool] = None, 2588 maximum: t.Optional[bool] = None, 2589 ) -> exp.DataBlocksizeProperty: 2590 self._match(TokenType.EQ) 2591 size = self._parse_number() 2592 2593 units = None 2594 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2595 units = self._prev.text 2596 2597 return self.expression( 2598 exp.DataBlocksizeProperty, 2599 size=size, 2600 units=units, 2601 default=default, 2602 minimum=minimum, 2603 maximum=maximum, 2604 ) 2605 2606 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2607 self._match(TokenType.EQ) 2608 always = self._match_text_seq("ALWAYS") 2609 manual = self._match_text_seq("MANUAL") 2610 never = self._match_text_seq("NEVER") 2611 default = self._match_text_seq("DEFAULT") 2612 2613 autotemp = None 2614 if self._match_text_seq("AUTOTEMP"): 2615 autotemp = self._parse_schema() 2616 2617 return self.expression( 2618 exp.BlockCompressionProperty, 2619 always=always, 2620 manual=manual, 2621 never=never, 2622 default=default, 2623 autotemp=autotemp, 2624 ) 2625 2626 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2627 index = self._index 2628 no = self._match_text_seq("NO") 2629 concurrent = self._match_text_seq("CONCURRENT") 2630 2631 if not self._match_text_seq("ISOLATED", "LOADING"): 2632 self._retreat(index) 2633 return None 2634 2635 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2636 return self.expression( 2637 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2638 ) 2639 2640 def _parse_locking(self) -> exp.LockingProperty: 2641 if self._match(TokenType.TABLE): 2642 kind = "TABLE" 2643 elif self._match(TokenType.VIEW): 2644 kind = "VIEW" 2645 elif self._match(TokenType.ROW): 2646 kind = "ROW" 2647 elif self._match_text_seq("DATABASE"): 2648 kind = "DATABASE" 2649 else: 2650 kind = None 2651 2652 if kind in ("DATABASE", "TABLE", "VIEW"): 2653 this = self._parse_table_parts() 2654 else: 2655 this = None 2656 2657 if self._match(TokenType.FOR): 2658 for_or_in = "FOR" 2659 elif self._match(TokenType.IN): 2660 for_or_in = "IN" 2661 else: 2662 for_or_in = None 2663 2664 if self._match_text_seq("ACCESS"): 2665 lock_type = "ACCESS" 2666 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2667 lock_type = "EXCLUSIVE" 2668 elif self._match_text_seq("SHARE"): 2669 lock_type = "SHARE" 2670 elif self._match_text_seq("READ"): 2671 lock_type = "READ" 2672 elif self._match_text_seq("WRITE"): 2673 lock_type = "WRITE" 2674 elif self._match_text_seq("CHECKSUM"): 2675 lock_type = "CHECKSUM" 2676 else: 2677 lock_type = None 2678 2679 override = self._match_text_seq("OVERRIDE") 2680 2681 return self.expression( 2682 exp.LockingProperty, 2683 this=this, 2684 kind=kind, 2685 for_or_in=for_or_in, 2686 lock_type=lock_type, 2687 override=override, 2688 ) 2689 2690 def _parse_partition_by(self) -> t.List[exp.Expression]: 2691 if self._match(TokenType.PARTITION_BY): 2692 return self._parse_csv(self._parse_assignment) 2693 return [] 2694 2695 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2696 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2697 if self._match_text_seq("MINVALUE"): 2698 return exp.var("MINVALUE") 2699 if self._match_text_seq("MAXVALUE"): 2700 return exp.var("MAXVALUE") 2701 return self._parse_bitwise() 2702 2703 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2704 expression = None 2705 from_expressions = None 2706 to_expressions = None 2707 2708 if self._match(TokenType.IN): 2709 this = self._parse_wrapped_csv(self._parse_bitwise) 2710 elif self._match(TokenType.FROM): 2711 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2712 self._match_text_seq("TO") 2713 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2714 elif self._match_text_seq("WITH", "(", "MODULUS"): 2715 this = self._parse_number() 2716 self._match_text_seq(",", "REMAINDER") 2717 expression = self._parse_number() 2718 self._match_r_paren() 2719 else: 2720 self.raise_error("Failed to parse partition bound spec.") 2721 2722 return self.expression( 2723 exp.PartitionBoundSpec, 2724 this=this, 2725 expression=expression, 2726 from_expressions=from_expressions, 2727 to_expressions=to_expressions, 2728 ) 2729 2730 # https://www.postgresql.org/docs/current/sql-createtable.html 2731 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2732 if not self._match_text_seq("OF"): 2733 self._retreat(self._index - 1) 2734 return None 2735 2736 this = self._parse_table(schema=True) 2737 2738 if self._match(TokenType.DEFAULT): 2739 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2740 elif self._match_text_seq("FOR", "VALUES"): 2741 expression = self._parse_partition_bound_spec() 2742 else: 2743 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2744 2745 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2746 2747 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2748 self._match(TokenType.EQ) 2749 return self.expression( 2750 exp.PartitionedByProperty, 2751 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2752 ) 2753 2754 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2755 if self._match_text_seq("AND", "STATISTICS"): 2756 statistics = True 2757 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2758 statistics = False 2759 else: 2760 statistics = None 2761 2762 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2763 2764 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2765 if self._match_text_seq("SQL"): 2766 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2767 return None 2768 2769 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2770 if self._match_text_seq("SQL", "DATA"): 2771 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2772 return None 2773 2774 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2775 if self._match_text_seq("PRIMARY", "INDEX"): 2776 return exp.NoPrimaryIndexProperty() 2777 if self._match_text_seq("SQL"): 2778 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2779 return None 2780 2781 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2782 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2783 return exp.OnCommitProperty() 2784 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2785 return exp.OnCommitProperty(delete=True) 2786 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2787 2788 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2789 if self._match_text_seq("SQL", "DATA"): 2790 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2791 return None 2792 2793 def _parse_distkey(self) -> exp.DistKeyProperty: 2794 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2795 2796 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2797 table = self._parse_table(schema=True) 2798 2799 options = [] 2800 while self._match_texts(("INCLUDING", "EXCLUDING")): 2801 this = self._prev.text.upper() 2802 2803 id_var = self._parse_id_var() 2804 if not id_var: 2805 return None 2806 2807 options.append( 2808 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2809 ) 2810 2811 return self.expression(exp.LikeProperty, this=table, expressions=options) 2812 2813 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2814 return self.expression( 2815 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2816 ) 2817 2818 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2819 self._match(TokenType.EQ) 2820 return self.expression( 2821 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2822 ) 2823 2824 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2825 self._match_text_seq("WITH", "CONNECTION") 2826 return self.expression( 2827 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2828 ) 2829 2830 def _parse_returns(self) -> exp.ReturnsProperty: 2831 value: t.Optional[exp.Expression] 2832 null = None 2833 is_table = self._match(TokenType.TABLE) 2834 2835 if is_table: 2836 if self._match(TokenType.LT): 2837 value = self.expression( 2838 exp.Schema, 2839 this="TABLE", 2840 expressions=self._parse_csv(self._parse_struct_types), 2841 ) 2842 if not self._match(TokenType.GT): 2843 self.raise_error("Expecting >") 2844 else: 2845 value = self._parse_schema(exp.var("TABLE")) 2846 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2847 null = True 2848 value = None 2849 else: 2850 value = self._parse_types() 2851 2852 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2853 2854 def _parse_describe(self) -> exp.Describe: 2855 kind = self._match_set(self.CREATABLES) and self._prev.text 2856 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2857 if self._match(TokenType.DOT): 2858 style = None 2859 self._retreat(self._index - 2) 2860 2861 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2862 2863 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2864 this = self._parse_statement() 2865 else: 2866 this = self._parse_table(schema=True) 2867 2868 properties = self._parse_properties() 2869 expressions = properties.expressions if properties else None 2870 partition = self._parse_partition() 2871 return self.expression( 2872 exp.Describe, 2873 this=this, 2874 style=style, 2875 kind=kind, 2876 expressions=expressions, 2877 partition=partition, 2878 format=format, 2879 ) 2880 2881 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2882 kind = self._prev.text.upper() 2883 expressions = [] 2884 2885 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2886 if self._match(TokenType.WHEN): 2887 expression = self._parse_disjunction() 2888 self._match(TokenType.THEN) 2889 else: 2890 expression = None 2891 2892 else_ = self._match(TokenType.ELSE) 2893 2894 if not self._match(TokenType.INTO): 2895 return None 2896 2897 return self.expression( 2898 exp.ConditionalInsert, 2899 this=self.expression( 2900 exp.Insert, 2901 this=self._parse_table(schema=True), 2902 expression=self._parse_derived_table_values(), 2903 ), 2904 expression=expression, 2905 else_=else_, 2906 ) 2907 2908 expression = parse_conditional_insert() 2909 while expression is not None: 2910 expressions.append(expression) 2911 expression = parse_conditional_insert() 2912 2913 return self.expression( 2914 exp.MultitableInserts, 2915 kind=kind, 2916 comments=comments, 2917 expressions=expressions, 2918 source=self._parse_table(), 2919 ) 2920 2921 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2922 comments = [] 2923 hint = self._parse_hint() 2924 overwrite = self._match(TokenType.OVERWRITE) 2925 ignore = self._match(TokenType.IGNORE) 2926 local = self._match_text_seq("LOCAL") 2927 alternative = None 2928 is_function = None 2929 2930 if self._match_text_seq("DIRECTORY"): 2931 this: t.Optional[exp.Expression] = self.expression( 2932 exp.Directory, 2933 this=self._parse_var_or_string(), 2934 local=local, 2935 row_format=self._parse_row_format(match_row=True), 2936 ) 2937 else: 2938 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2939 comments += ensure_list(self._prev_comments) 2940 return self._parse_multitable_inserts(comments) 2941 2942 if self._match(TokenType.OR): 2943 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2944 2945 self._match(TokenType.INTO) 2946 comments += ensure_list(self._prev_comments) 2947 self._match(TokenType.TABLE) 2948 is_function = self._match(TokenType.FUNCTION) 2949 2950 this = ( 2951 self._parse_table(schema=True, parse_partition=True) 2952 if not is_function 2953 else self._parse_function() 2954 ) 2955 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2956 this.set("alias", self._parse_table_alias()) 2957 2958 returning = self._parse_returning() 2959 2960 return self.expression( 2961 exp.Insert, 2962 comments=comments, 2963 hint=hint, 2964 is_function=is_function, 2965 this=this, 2966 stored=self._match_text_seq("STORED") and self._parse_stored(), 2967 by_name=self._match_text_seq("BY", "NAME"), 2968 exists=self._parse_exists(), 2969 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2970 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2971 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2972 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2973 conflict=self._parse_on_conflict(), 2974 returning=returning or self._parse_returning(), 2975 overwrite=overwrite, 2976 alternative=alternative, 2977 ignore=ignore, 2978 source=self._match(TokenType.TABLE) and self._parse_table(), 2979 ) 2980 2981 def _parse_kill(self) -> exp.Kill: 2982 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2983 2984 return self.expression( 2985 exp.Kill, 2986 this=self._parse_primary(), 2987 kind=kind, 2988 ) 2989 2990 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2991 conflict = self._match_text_seq("ON", "CONFLICT") 2992 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2993 2994 if not conflict and not duplicate: 2995 return None 2996 2997 conflict_keys = None 2998 constraint = None 2999 3000 if conflict: 3001 if self._match_text_seq("ON", "CONSTRAINT"): 3002 constraint = self._parse_id_var() 3003 elif self._match(TokenType.L_PAREN): 3004 conflict_keys = self._parse_csv(self._parse_id_var) 3005 self._match_r_paren() 3006 3007 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3008 if self._prev.token_type == TokenType.UPDATE: 3009 self._match(TokenType.SET) 3010 expressions = self._parse_csv(self._parse_equality) 3011 else: 3012 expressions = None 3013 3014 return self.expression( 3015 exp.OnConflict, 3016 duplicate=duplicate, 3017 expressions=expressions, 3018 action=action, 3019 conflict_keys=conflict_keys, 3020 constraint=constraint, 3021 where=self._parse_where(), 3022 ) 3023 3024 def _parse_returning(self) -> t.Optional[exp.Returning]: 3025 if not self._match(TokenType.RETURNING): 3026 return None 3027 return self.expression( 3028 exp.Returning, 3029 expressions=self._parse_csv(self._parse_expression), 3030 into=self._match(TokenType.INTO) and self._parse_table_part(), 3031 ) 3032 3033 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3034 if not self._match(TokenType.FORMAT): 3035 return None 3036 return self._parse_row_format() 3037 3038 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3039 index = self._index 3040 with_ = with_ or self._match_text_seq("WITH") 3041 3042 if not self._match(TokenType.SERDE_PROPERTIES): 3043 self._retreat(index) 3044 return None 3045 return self.expression( 3046 exp.SerdeProperties, 3047 **{ # type: ignore 3048 "expressions": self._parse_wrapped_properties(), 3049 "with": with_, 3050 }, 3051 ) 3052 3053 def _parse_row_format( 3054 self, match_row: bool = False 3055 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3056 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3057 return None 3058 3059 if self._match_text_seq("SERDE"): 3060 this = self._parse_string() 3061 3062 serde_properties = self._parse_serde_properties() 3063 3064 return self.expression( 3065 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3066 ) 3067 3068 self._match_text_seq("DELIMITED") 3069 3070 kwargs = {} 3071 3072 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3073 kwargs["fields"] = self._parse_string() 3074 if self._match_text_seq("ESCAPED", "BY"): 3075 kwargs["escaped"] = self._parse_string() 3076 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3077 kwargs["collection_items"] = self._parse_string() 3078 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3079 kwargs["map_keys"] = self._parse_string() 3080 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3081 kwargs["lines"] = self._parse_string() 3082 if self._match_text_seq("NULL", "DEFINED", "AS"): 3083 kwargs["null"] = self._parse_string() 3084 3085 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3086 3087 def _parse_load(self) -> exp.LoadData | exp.Command: 3088 if self._match_text_seq("DATA"): 3089 local = self._match_text_seq("LOCAL") 3090 self._match_text_seq("INPATH") 3091 inpath = self._parse_string() 3092 overwrite = self._match(TokenType.OVERWRITE) 3093 self._match_pair(TokenType.INTO, TokenType.TABLE) 3094 3095 return self.expression( 3096 exp.LoadData, 3097 this=self._parse_table(schema=True), 3098 local=local, 3099 overwrite=overwrite, 3100 inpath=inpath, 3101 partition=self._parse_partition(), 3102 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3103 serde=self._match_text_seq("SERDE") and self._parse_string(), 3104 ) 3105 return self._parse_as_command(self._prev) 3106 3107 def _parse_delete(self) -> exp.Delete: 3108 # This handles MySQL's "Multiple-Table Syntax" 3109 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3110 tables = None 3111 if not self._match(TokenType.FROM, advance=False): 3112 tables = self._parse_csv(self._parse_table) or None 3113 3114 returning = self._parse_returning() 3115 3116 return self.expression( 3117 exp.Delete, 3118 tables=tables, 3119 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3120 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3121 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3122 where=self._parse_where(), 3123 returning=returning or self._parse_returning(), 3124 limit=self._parse_limit(), 3125 ) 3126 3127 def _parse_update(self) -> exp.Update: 3128 kwargs: t.Dict[str, t.Any] = { 3129 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3130 } 3131 while self._curr: 3132 if self._match(TokenType.SET): 3133 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3134 elif self._match(TokenType.RETURNING, advance=False): 3135 kwargs["returning"] = self._parse_returning() 3136 elif self._match(TokenType.FROM, advance=False): 3137 kwargs["from"] = self._parse_from(joins=True) 3138 elif self._match(TokenType.WHERE, advance=False): 3139 kwargs["where"] = self._parse_where() 3140 elif self._match(TokenType.ORDER_BY, advance=False): 3141 kwargs["order"] = self._parse_order() 3142 elif self._match(TokenType.LIMIT, advance=False): 3143 kwargs["limit"] = self._parse_limit() 3144 else: 3145 break 3146 3147 return self.expression(exp.Update, **kwargs) 3148 3149 def _parse_use(self) -> exp.Use: 3150 return self.expression( 3151 exp.Use, 3152 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3153 this=self._parse_table(schema=False), 3154 ) 3155 3156 def _parse_uncache(self) -> exp.Uncache: 3157 if not self._match(TokenType.TABLE): 3158 self.raise_error("Expecting TABLE after UNCACHE") 3159 3160 return self.expression( 3161 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3162 ) 3163 3164 def _parse_cache(self) -> exp.Cache: 3165 lazy = self._match_text_seq("LAZY") 3166 self._match(TokenType.TABLE) 3167 table = self._parse_table(schema=True) 3168 3169 options = [] 3170 if self._match_text_seq("OPTIONS"): 3171 self._match_l_paren() 3172 k = self._parse_string() 3173 self._match(TokenType.EQ) 3174 v = self._parse_string() 3175 options = [k, v] 3176 self._match_r_paren() 3177 3178 self._match(TokenType.ALIAS) 3179 return self.expression( 3180 exp.Cache, 3181 this=table, 3182 lazy=lazy, 3183 options=options, 3184 expression=self._parse_select(nested=True), 3185 ) 3186 3187 def _parse_partition(self) -> t.Optional[exp.Partition]: 3188 if not self._match_texts(self.PARTITION_KEYWORDS): 3189 return None 3190 3191 return self.expression( 3192 exp.Partition, 3193 subpartition=self._prev.text.upper() == "SUBPARTITION", 3194 expressions=self._parse_wrapped_csv(self._parse_assignment), 3195 ) 3196 3197 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3198 def _parse_value_expression() -> t.Optional[exp.Expression]: 3199 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3200 return exp.var(self._prev.text.upper()) 3201 return self._parse_expression() 3202 3203 if self._match(TokenType.L_PAREN): 3204 expressions = self._parse_csv(_parse_value_expression) 3205 self._match_r_paren() 3206 return self.expression(exp.Tuple, expressions=expressions) 3207 3208 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3209 expression = self._parse_expression() 3210 if expression: 3211 return self.expression(exp.Tuple, expressions=[expression]) 3212 return None 3213 3214 def _parse_projections(self) -> t.List[exp.Expression]: 3215 return self._parse_expressions() 3216 3217 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3218 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3219 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3220 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3221 ) 3222 elif self._match(TokenType.FROM): 3223 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3224 # Support parentheses for duckdb FROM-first syntax 3225 select = self._parse_select(from_=from_) 3226 if select: 3227 if not select.args.get("from"): 3228 select.set("from", from_) 3229 this = select 3230 else: 3231 this = exp.select("*").from_(t.cast(exp.From, from_)) 3232 else: 3233 this = ( 3234 self._parse_table(consume_pipe=True) 3235 if table 3236 else self._parse_select(nested=True, parse_set_operation=False) 3237 ) 3238 3239 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3240 # in case a modifier (e.g. join) is following 3241 if table and isinstance(this, exp.Values) and this.alias: 3242 alias = this.args["alias"].pop() 3243 this = exp.Table(this=this, alias=alias) 3244 3245 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3246 3247 return this 3248 3249 def _parse_select( 3250 self, 3251 nested: bool = False, 3252 table: bool = False, 3253 parse_subquery_alias: bool = True, 3254 parse_set_operation: bool = True, 3255 consume_pipe: bool = True, 3256 from_: t.Optional[exp.From] = None, 3257 ) -> t.Optional[exp.Expression]: 3258 query = self._parse_select_query( 3259 nested=nested, 3260 table=table, 3261 parse_subquery_alias=parse_subquery_alias, 3262 parse_set_operation=parse_set_operation, 3263 ) 3264 3265 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3266 if not query and from_: 3267 query = exp.select("*").from_(from_) 3268 if isinstance(query, exp.Query): 3269 query = self._parse_pipe_syntax_query(query) 3270 query = query.subquery(copy=False) if query and table else query 3271 3272 return query 3273 3274 def _parse_select_query( 3275 self, 3276 nested: bool = False, 3277 table: bool = False, 3278 parse_subquery_alias: bool = True, 3279 parse_set_operation: bool = True, 3280 ) -> t.Optional[exp.Expression]: 3281 cte = self._parse_with() 3282 3283 if cte: 3284 this = self._parse_statement() 3285 3286 if not this: 3287 self.raise_error("Failed to parse any statement following CTE") 3288 return cte 3289 3290 if "with" in this.arg_types: 3291 this.set("with", cte) 3292 else: 3293 self.raise_error(f"{this.key} does not support CTE") 3294 this = cte 3295 3296 return this 3297 3298 # duckdb supports leading with FROM x 3299 from_ = ( 3300 self._parse_from(consume_pipe=True) 3301 if self._match(TokenType.FROM, advance=False) 3302 else None 3303 ) 3304 3305 if self._match(TokenType.SELECT): 3306 comments = self._prev_comments 3307 3308 hint = self._parse_hint() 3309 3310 if self._next and not self._next.token_type == TokenType.DOT: 3311 all_ = self._match(TokenType.ALL) 3312 distinct = self._match_set(self.DISTINCT_TOKENS) 3313 else: 3314 all_, distinct = None, None 3315 3316 kind = ( 3317 self._match(TokenType.ALIAS) 3318 and self._match_texts(("STRUCT", "VALUE")) 3319 and self._prev.text.upper() 3320 ) 3321 3322 if distinct: 3323 distinct = self.expression( 3324 exp.Distinct, 3325 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3326 ) 3327 3328 if all_ and distinct: 3329 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3330 3331 operation_modifiers = [] 3332 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3333 operation_modifiers.append(exp.var(self._prev.text.upper())) 3334 3335 limit = self._parse_limit(top=True) 3336 projections = self._parse_projections() 3337 3338 this = self.expression( 3339 exp.Select, 3340 kind=kind, 3341 hint=hint, 3342 distinct=distinct, 3343 expressions=projections, 3344 limit=limit, 3345 operation_modifiers=operation_modifiers or None, 3346 ) 3347 this.comments = comments 3348 3349 into = self._parse_into() 3350 if into: 3351 this.set("into", into) 3352 3353 if not from_: 3354 from_ = self._parse_from() 3355 3356 if from_: 3357 this.set("from", from_) 3358 3359 this = self._parse_query_modifiers(this) 3360 elif (table or nested) and self._match(TokenType.L_PAREN): 3361 this = self._parse_wrapped_select(table=table) 3362 3363 # We return early here so that the UNION isn't attached to the subquery by the 3364 # following call to _parse_set_operations, but instead becomes the parent node 3365 self._match_r_paren() 3366 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3367 elif self._match(TokenType.VALUES, advance=False): 3368 this = self._parse_derived_table_values() 3369 elif from_: 3370 this = exp.select("*").from_(from_.this, copy=False) 3371 elif self._match(TokenType.SUMMARIZE): 3372 table = self._match(TokenType.TABLE) 3373 this = self._parse_select() or self._parse_string() or self._parse_table() 3374 return self.expression(exp.Summarize, this=this, table=table) 3375 elif self._match(TokenType.DESCRIBE): 3376 this = self._parse_describe() 3377 elif self._match_text_seq("STREAM"): 3378 this = self._parse_function() 3379 if this: 3380 this = self.expression(exp.Stream, this=this) 3381 else: 3382 self._retreat(self._index - 1) 3383 else: 3384 this = None 3385 3386 return self._parse_set_operations(this) if parse_set_operation else this 3387 3388 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3389 self._match_text_seq("SEARCH") 3390 3391 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3392 3393 if not kind: 3394 return None 3395 3396 self._match_text_seq("FIRST", "BY") 3397 3398 return self.expression( 3399 exp.RecursiveWithSearch, 3400 kind=kind, 3401 this=self._parse_id_var(), 3402 expression=self._match_text_seq("SET") and self._parse_id_var(), 3403 using=self._match_text_seq("USING") and self._parse_id_var(), 3404 ) 3405 3406 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3407 if not skip_with_token and not self._match(TokenType.WITH): 3408 return None 3409 3410 comments = self._prev_comments 3411 recursive = self._match(TokenType.RECURSIVE) 3412 3413 last_comments = None 3414 expressions = [] 3415 while True: 3416 cte = self._parse_cte() 3417 if isinstance(cte, exp.CTE): 3418 expressions.append(cte) 3419 if last_comments: 3420 cte.add_comments(last_comments) 3421 3422 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3423 break 3424 else: 3425 self._match(TokenType.WITH) 3426 3427 last_comments = self._prev_comments 3428 3429 return self.expression( 3430 exp.With, 3431 comments=comments, 3432 expressions=expressions, 3433 recursive=recursive, 3434 search=self._parse_recursive_with_search(), 3435 ) 3436 3437 def _parse_cte(self) -> t.Optional[exp.CTE]: 3438 index = self._index 3439 3440 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3441 if not alias or not alias.this: 3442 self.raise_error("Expected CTE to have alias") 3443 3444 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3445 self._retreat(index) 3446 return None 3447 3448 comments = self._prev_comments 3449 3450 if self._match_text_seq("NOT", "MATERIALIZED"): 3451 materialized = False 3452 elif self._match_text_seq("MATERIALIZED"): 3453 materialized = True 3454 else: 3455 materialized = None 3456 3457 cte = self.expression( 3458 exp.CTE, 3459 this=self._parse_wrapped(self._parse_statement), 3460 alias=alias, 3461 materialized=materialized, 3462 comments=comments, 3463 ) 3464 3465 values = cte.this 3466 if isinstance(values, exp.Values): 3467 if values.alias: 3468 cte.set("this", exp.select("*").from_(values)) 3469 else: 3470 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3471 3472 return cte 3473 3474 def _parse_table_alias( 3475 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3476 ) -> t.Optional[exp.TableAlias]: 3477 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3478 # so this section tries to parse the clause version and if it fails, it treats the token 3479 # as an identifier (alias) 3480 if self._can_parse_limit_or_offset(): 3481 return None 3482 3483 any_token = self._match(TokenType.ALIAS) 3484 alias = ( 3485 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3486 or self._parse_string_as_identifier() 3487 ) 3488 3489 index = self._index 3490 if self._match(TokenType.L_PAREN): 3491 columns = self._parse_csv(self._parse_function_parameter) 3492 self._match_r_paren() if columns else self._retreat(index) 3493 else: 3494 columns = None 3495 3496 if not alias and not columns: 3497 return None 3498 3499 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3500 3501 # We bubble up comments from the Identifier to the TableAlias 3502 if isinstance(alias, exp.Identifier): 3503 table_alias.add_comments(alias.pop_comments()) 3504 3505 return table_alias 3506 3507 def _parse_subquery( 3508 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3509 ) -> t.Optional[exp.Subquery]: 3510 if not this: 3511 return None 3512 3513 return self.expression( 3514 exp.Subquery, 3515 this=this, 3516 pivots=self._parse_pivots(), 3517 alias=self._parse_table_alias() if parse_alias else None, 3518 sample=self._parse_table_sample(), 3519 ) 3520 3521 def _implicit_unnests_to_explicit(self, this: E) -> E: 3522 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3523 3524 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3525 for i, join in enumerate(this.args.get("joins") or []): 3526 table = join.this 3527 normalized_table = table.copy() 3528 normalized_table.meta["maybe_column"] = True 3529 normalized_table = _norm(normalized_table, dialect=self.dialect) 3530 3531 if isinstance(table, exp.Table) and not join.args.get("on"): 3532 if normalized_table.parts[0].name in refs: 3533 table_as_column = table.to_column() 3534 unnest = exp.Unnest(expressions=[table_as_column]) 3535 3536 # Table.to_column creates a parent Alias node that we want to convert to 3537 # a TableAlias and attach to the Unnest, so it matches the parser's output 3538 if isinstance(table.args.get("alias"), exp.TableAlias): 3539 table_as_column.replace(table_as_column.this) 3540 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3541 3542 table.replace(unnest) 3543 3544 refs.add(normalized_table.alias_or_name) 3545 3546 return this 3547 3548 def _parse_query_modifiers( 3549 self, this: t.Optional[exp.Expression] 3550 ) -> t.Optional[exp.Expression]: 3551 if isinstance(this, self.MODIFIABLES): 3552 for join in self._parse_joins(): 3553 this.append("joins", join) 3554 for lateral in iter(self._parse_lateral, None): 3555 this.append("laterals", lateral) 3556 3557 while True: 3558 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3559 modifier_token = self._curr 3560 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3561 key, expression = parser(self) 3562 3563 if expression: 3564 if this.args.get(key): 3565 self.raise_error( 3566 f"Found multiple '{modifier_token.text.upper()}' clauses", 3567 token=modifier_token, 3568 ) 3569 3570 this.set(key, expression) 3571 if key == "limit": 3572 offset = expression.args.pop("offset", None) 3573 3574 if offset: 3575 offset = exp.Offset(expression=offset) 3576 this.set("offset", offset) 3577 3578 limit_by_expressions = expression.expressions 3579 expression.set("expressions", None) 3580 offset.set("expressions", limit_by_expressions) 3581 continue 3582 break 3583 3584 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3585 this = self._implicit_unnests_to_explicit(this) 3586 3587 return this 3588 3589 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3590 start = self._curr 3591 while self._curr: 3592 self._advance() 3593 3594 end = self._tokens[self._index - 1] 3595 return exp.Hint(expressions=[self._find_sql(start, end)]) 3596 3597 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3598 return self._parse_function_call() 3599 3600 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3601 start_index = self._index 3602 should_fallback_to_string = False 3603 3604 hints = [] 3605 try: 3606 for hint in iter( 3607 lambda: self._parse_csv( 3608 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3609 ), 3610 [], 3611 ): 3612 hints.extend(hint) 3613 except ParseError: 3614 should_fallback_to_string = True 3615 3616 if should_fallback_to_string or self._curr: 3617 self._retreat(start_index) 3618 return self._parse_hint_fallback_to_string() 3619 3620 return self.expression(exp.Hint, expressions=hints) 3621 3622 def _parse_hint(self) -> t.Optional[exp.Hint]: 3623 if self._match(TokenType.HINT) and self._prev_comments: 3624 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3625 3626 return None 3627 3628 def _parse_into(self) -> t.Optional[exp.Into]: 3629 if not self._match(TokenType.INTO): 3630 return None 3631 3632 temp = self._match(TokenType.TEMPORARY) 3633 unlogged = self._match_text_seq("UNLOGGED") 3634 self._match(TokenType.TABLE) 3635 3636 return self.expression( 3637 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3638 ) 3639 3640 def _parse_from( 3641 self, 3642 joins: bool = False, 3643 skip_from_token: bool = False, 3644 consume_pipe: bool = False, 3645 ) -> t.Optional[exp.From]: 3646 if not skip_from_token and not self._match(TokenType.FROM): 3647 return None 3648 3649 return self.expression( 3650 exp.From, 3651 comments=self._prev_comments, 3652 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3653 ) 3654 3655 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3656 return self.expression( 3657 exp.MatchRecognizeMeasure, 3658 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3659 this=self._parse_expression(), 3660 ) 3661 3662 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3663 if not self._match(TokenType.MATCH_RECOGNIZE): 3664 return None 3665 3666 self._match_l_paren() 3667 3668 partition = self._parse_partition_by() 3669 order = self._parse_order() 3670 3671 measures = ( 3672 self._parse_csv(self._parse_match_recognize_measure) 3673 if self._match_text_seq("MEASURES") 3674 else None 3675 ) 3676 3677 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3678 rows = exp.var("ONE ROW PER MATCH") 3679 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3680 text = "ALL ROWS PER MATCH" 3681 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3682 text += " SHOW EMPTY MATCHES" 3683 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3684 text += " OMIT EMPTY MATCHES" 3685 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3686 text += " WITH UNMATCHED ROWS" 3687 rows = exp.var(text) 3688 else: 3689 rows = None 3690 3691 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3692 text = "AFTER MATCH SKIP" 3693 if self._match_text_seq("PAST", "LAST", "ROW"): 3694 text += " PAST LAST ROW" 3695 elif self._match_text_seq("TO", "NEXT", "ROW"): 3696 text += " TO NEXT ROW" 3697 elif self._match_text_seq("TO", "FIRST"): 3698 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3699 elif self._match_text_seq("TO", "LAST"): 3700 text += f" TO LAST {self._advance_any().text}" # type: ignore 3701 after = exp.var(text) 3702 else: 3703 after = None 3704 3705 if self._match_text_seq("PATTERN"): 3706 self._match_l_paren() 3707 3708 if not self._curr: 3709 self.raise_error("Expecting )", self._curr) 3710 3711 paren = 1 3712 start = self._curr 3713 3714 while self._curr and paren > 0: 3715 if self._curr.token_type == TokenType.L_PAREN: 3716 paren += 1 3717 if self._curr.token_type == TokenType.R_PAREN: 3718 paren -= 1 3719 3720 end = self._prev 3721 self._advance() 3722 3723 if paren > 0: 3724 self.raise_error("Expecting )", self._curr) 3725 3726 pattern = exp.var(self._find_sql(start, end)) 3727 else: 3728 pattern = None 3729 3730 define = ( 3731 self._parse_csv(self._parse_name_as_expression) 3732 if self._match_text_seq("DEFINE") 3733 else None 3734 ) 3735 3736 self._match_r_paren() 3737 3738 return self.expression( 3739 exp.MatchRecognize, 3740 partition_by=partition, 3741 order=order, 3742 measures=measures, 3743 rows=rows, 3744 after=after, 3745 pattern=pattern, 3746 define=define, 3747 alias=self._parse_table_alias(), 3748 ) 3749 3750 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3751 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3752 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3753 cross_apply = False 3754 3755 if cross_apply is not None: 3756 this = self._parse_select(table=True) 3757 view = None 3758 outer = None 3759 elif self._match(TokenType.LATERAL): 3760 this = self._parse_select(table=True) 3761 view = self._match(TokenType.VIEW) 3762 outer = self._match(TokenType.OUTER) 3763 else: 3764 return None 3765 3766 if not this: 3767 this = ( 3768 self._parse_unnest() 3769 or self._parse_function() 3770 or self._parse_id_var(any_token=False) 3771 ) 3772 3773 while self._match(TokenType.DOT): 3774 this = exp.Dot( 3775 this=this, 3776 expression=self._parse_function() or self._parse_id_var(any_token=False), 3777 ) 3778 3779 ordinality: t.Optional[bool] = None 3780 3781 if view: 3782 table = self._parse_id_var(any_token=False) 3783 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3784 table_alias: t.Optional[exp.TableAlias] = self.expression( 3785 exp.TableAlias, this=table, columns=columns 3786 ) 3787 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3788 # We move the alias from the lateral's child node to the lateral itself 3789 table_alias = this.args["alias"].pop() 3790 else: 3791 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3792 table_alias = self._parse_table_alias() 3793 3794 return self.expression( 3795 exp.Lateral, 3796 this=this, 3797 view=view, 3798 outer=outer, 3799 alias=table_alias, 3800 cross_apply=cross_apply, 3801 ordinality=ordinality, 3802 ) 3803 3804 def _parse_join_parts( 3805 self, 3806 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3807 return ( 3808 self._match_set(self.JOIN_METHODS) and self._prev, 3809 self._match_set(self.JOIN_SIDES) and self._prev, 3810 self._match_set(self.JOIN_KINDS) and self._prev, 3811 ) 3812 3813 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3814 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3815 this = self._parse_column() 3816 if isinstance(this, exp.Column): 3817 return this.this 3818 return this 3819 3820 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3821 3822 def _parse_join( 3823 self, skip_join_token: bool = False, parse_bracket: bool = False 3824 ) -> t.Optional[exp.Join]: 3825 if self._match(TokenType.COMMA): 3826 table = self._try_parse(self._parse_table) 3827 cross_join = self.expression(exp.Join, this=table) if table else None 3828 3829 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3830 cross_join.set("kind", "CROSS") 3831 3832 return cross_join 3833 3834 index = self._index 3835 method, side, kind = self._parse_join_parts() 3836 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3837 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3838 join_comments = self._prev_comments 3839 3840 if not skip_join_token and not join: 3841 self._retreat(index) 3842 kind = None 3843 method = None 3844 side = None 3845 3846 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3847 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3848 3849 if not skip_join_token and not join and not outer_apply and not cross_apply: 3850 return None 3851 3852 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3853 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3854 kwargs["expressions"] = self._parse_csv( 3855 lambda: self._parse_table(parse_bracket=parse_bracket) 3856 ) 3857 3858 if method: 3859 kwargs["method"] = method.text 3860 if side: 3861 kwargs["side"] = side.text 3862 if kind: 3863 kwargs["kind"] = kind.text 3864 if hint: 3865 kwargs["hint"] = hint 3866 3867 if self._match(TokenType.MATCH_CONDITION): 3868 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3869 3870 if self._match(TokenType.ON): 3871 kwargs["on"] = self._parse_assignment() 3872 elif self._match(TokenType.USING): 3873 kwargs["using"] = self._parse_using_identifiers() 3874 elif ( 3875 not method 3876 and not (outer_apply or cross_apply) 3877 and not isinstance(kwargs["this"], exp.Unnest) 3878 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3879 ): 3880 index = self._index 3881 joins: t.Optional[list] = list(self._parse_joins()) 3882 3883 if joins and self._match(TokenType.ON): 3884 kwargs["on"] = self._parse_assignment() 3885 elif joins and self._match(TokenType.USING): 3886 kwargs["using"] = self._parse_using_identifiers() 3887 else: 3888 joins = None 3889 self._retreat(index) 3890 3891 kwargs["this"].set("joins", joins if joins else None) 3892 3893 kwargs["pivots"] = self._parse_pivots() 3894 3895 comments = [c for token in (method, side, kind) if token for c in token.comments] 3896 comments = (join_comments or []) + comments 3897 3898 if ( 3899 self.ADD_JOIN_ON_TRUE 3900 and not kwargs.get("on") 3901 and not kwargs.get("using") 3902 and not kwargs.get("method") 3903 and kwargs.get("kind") in (None, "INNER", "OUTER") 3904 ): 3905 kwargs["on"] = exp.true() 3906 3907 return self.expression(exp.Join, comments=comments, **kwargs) 3908 3909 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3910 this = self._parse_assignment() 3911 3912 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3913 return this 3914 3915 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3916 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3917 3918 return this 3919 3920 def _parse_index_params(self) -> exp.IndexParameters: 3921 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3922 3923 if self._match(TokenType.L_PAREN, advance=False): 3924 columns = self._parse_wrapped_csv(self._parse_with_operator) 3925 else: 3926 columns = None 3927 3928 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3929 partition_by = self._parse_partition_by() 3930 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3931 tablespace = ( 3932 self._parse_var(any_token=True) 3933 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3934 else None 3935 ) 3936 where = self._parse_where() 3937 3938 on = self._parse_field() if self._match(TokenType.ON) else None 3939 3940 return self.expression( 3941 exp.IndexParameters, 3942 using=using, 3943 columns=columns, 3944 include=include, 3945 partition_by=partition_by, 3946 where=where, 3947 with_storage=with_storage, 3948 tablespace=tablespace, 3949 on=on, 3950 ) 3951 3952 def _parse_index( 3953 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3954 ) -> t.Optional[exp.Index]: 3955 if index or anonymous: 3956 unique = None 3957 primary = None 3958 amp = None 3959 3960 self._match(TokenType.ON) 3961 self._match(TokenType.TABLE) # hive 3962 table = self._parse_table_parts(schema=True) 3963 else: 3964 unique = self._match(TokenType.UNIQUE) 3965 primary = self._match_text_seq("PRIMARY") 3966 amp = self._match_text_seq("AMP") 3967 3968 if not self._match(TokenType.INDEX): 3969 return None 3970 3971 index = self._parse_id_var() 3972 table = None 3973 3974 params = self._parse_index_params() 3975 3976 return self.expression( 3977 exp.Index, 3978 this=index, 3979 table=table, 3980 unique=unique, 3981 primary=primary, 3982 amp=amp, 3983 params=params, 3984 ) 3985 3986 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3987 hints: t.List[exp.Expression] = [] 3988 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3989 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3990 hints.append( 3991 self.expression( 3992 exp.WithTableHint, 3993 expressions=self._parse_csv( 3994 lambda: self._parse_function() or self._parse_var(any_token=True) 3995 ), 3996 ) 3997 ) 3998 self._match_r_paren() 3999 else: 4000 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4001 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4002 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4003 4004 self._match_set((TokenType.INDEX, TokenType.KEY)) 4005 if self._match(TokenType.FOR): 4006 hint.set("target", self._advance_any() and self._prev.text.upper()) 4007 4008 hint.set("expressions", self._parse_wrapped_id_vars()) 4009 hints.append(hint) 4010 4011 return hints or None 4012 4013 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4014 return ( 4015 (not schema and self._parse_function(optional_parens=False)) 4016 or self._parse_id_var(any_token=False) 4017 or self._parse_string_as_identifier() 4018 or self._parse_placeholder() 4019 ) 4020 4021 def _parse_table_parts( 4022 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4023 ) -> exp.Table: 4024 catalog = None 4025 db = None 4026 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4027 4028 while self._match(TokenType.DOT): 4029 if catalog: 4030 # This allows nesting the table in arbitrarily many dot expressions if needed 4031 table = self.expression( 4032 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4033 ) 4034 else: 4035 catalog = db 4036 db = table 4037 # "" used for tsql FROM a..b case 4038 table = self._parse_table_part(schema=schema) or "" 4039 4040 if ( 4041 wildcard 4042 and self._is_connected() 4043 and (isinstance(table, exp.Identifier) or not table) 4044 and self._match(TokenType.STAR) 4045 ): 4046 if isinstance(table, exp.Identifier): 4047 table.args["this"] += "*" 4048 else: 4049 table = exp.Identifier(this="*") 4050 4051 # We bubble up comments from the Identifier to the Table 4052 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4053 4054 if is_db_reference: 4055 catalog = db 4056 db = table 4057 table = None 4058 4059 if not table and not is_db_reference: 4060 self.raise_error(f"Expected table name but got {self._curr}") 4061 if not db and is_db_reference: 4062 self.raise_error(f"Expected database name but got {self._curr}") 4063 4064 table = self.expression( 4065 exp.Table, 4066 comments=comments, 4067 this=table, 4068 db=db, 4069 catalog=catalog, 4070 ) 4071 4072 changes = self._parse_changes() 4073 if changes: 4074 table.set("changes", changes) 4075 4076 at_before = self._parse_historical_data() 4077 if at_before: 4078 table.set("when", at_before) 4079 4080 pivots = self._parse_pivots() 4081 if pivots: 4082 table.set("pivots", pivots) 4083 4084 return table 4085 4086 def _parse_table( 4087 self, 4088 schema: bool = False, 4089 joins: bool = False, 4090 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4091 parse_bracket: bool = False, 4092 is_db_reference: bool = False, 4093 parse_partition: bool = False, 4094 consume_pipe: bool = False, 4095 ) -> t.Optional[exp.Expression]: 4096 lateral = self._parse_lateral() 4097 if lateral: 4098 return lateral 4099 4100 unnest = self._parse_unnest() 4101 if unnest: 4102 return unnest 4103 4104 values = self._parse_derived_table_values() 4105 if values: 4106 return values 4107 4108 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4109 if subquery: 4110 if not subquery.args.get("pivots"): 4111 subquery.set("pivots", self._parse_pivots()) 4112 return subquery 4113 4114 bracket = parse_bracket and self._parse_bracket(None) 4115 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4116 4117 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4118 self._parse_table 4119 ) 4120 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4121 4122 only = self._match(TokenType.ONLY) 4123 4124 this = t.cast( 4125 exp.Expression, 4126 bracket 4127 or rows_from 4128 or self._parse_bracket( 4129 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4130 ), 4131 ) 4132 4133 if only: 4134 this.set("only", only) 4135 4136 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4137 self._match_text_seq("*") 4138 4139 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4140 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4141 this.set("partition", self._parse_partition()) 4142 4143 if schema: 4144 return self._parse_schema(this=this) 4145 4146 version = self._parse_version() 4147 4148 if version: 4149 this.set("version", version) 4150 4151 if self.dialect.ALIAS_POST_TABLESAMPLE: 4152 this.set("sample", self._parse_table_sample()) 4153 4154 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4155 if alias: 4156 this.set("alias", alias) 4157 4158 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4159 return self.expression( 4160 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4161 ) 4162 4163 this.set("hints", self._parse_table_hints()) 4164 4165 if not this.args.get("pivots"): 4166 this.set("pivots", self._parse_pivots()) 4167 4168 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4169 this.set("sample", self._parse_table_sample()) 4170 4171 if joins: 4172 for join in self._parse_joins(): 4173 this.append("joins", join) 4174 4175 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4176 this.set("ordinality", True) 4177 this.set("alias", self._parse_table_alias()) 4178 4179 return this 4180 4181 def _parse_version(self) -> t.Optional[exp.Version]: 4182 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4183 this = "TIMESTAMP" 4184 elif self._match(TokenType.VERSION_SNAPSHOT): 4185 this = "VERSION" 4186 else: 4187 return None 4188 4189 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4190 kind = self._prev.text.upper() 4191 start = self._parse_bitwise() 4192 self._match_texts(("TO", "AND")) 4193 end = self._parse_bitwise() 4194 expression: t.Optional[exp.Expression] = self.expression( 4195 exp.Tuple, expressions=[start, end] 4196 ) 4197 elif self._match_text_seq("CONTAINED", "IN"): 4198 kind = "CONTAINED IN" 4199 expression = self.expression( 4200 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4201 ) 4202 elif self._match(TokenType.ALL): 4203 kind = "ALL" 4204 expression = None 4205 else: 4206 self._match_text_seq("AS", "OF") 4207 kind = "AS OF" 4208 expression = self._parse_type() 4209 4210 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4211 4212 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4213 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4214 index = self._index 4215 historical_data = None 4216 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4217 this = self._prev.text.upper() 4218 kind = ( 4219 self._match(TokenType.L_PAREN) 4220 and self._match_texts(self.HISTORICAL_DATA_KIND) 4221 and self._prev.text.upper() 4222 ) 4223 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4224 4225 if expression: 4226 self._match_r_paren() 4227 historical_data = self.expression( 4228 exp.HistoricalData, this=this, kind=kind, expression=expression 4229 ) 4230 else: 4231 self._retreat(index) 4232 4233 return historical_data 4234 4235 def _parse_changes(self) -> t.Optional[exp.Changes]: 4236 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4237 return None 4238 4239 information = self._parse_var(any_token=True) 4240 self._match_r_paren() 4241 4242 return self.expression( 4243 exp.Changes, 4244 information=information, 4245 at_before=self._parse_historical_data(), 4246 end=self._parse_historical_data(), 4247 ) 4248 4249 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4250 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4251 return None 4252 4253 self._advance() 4254 4255 expressions = self._parse_wrapped_csv(self._parse_equality) 4256 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4257 4258 alias = self._parse_table_alias() if with_alias else None 4259 4260 if alias: 4261 if self.dialect.UNNEST_COLUMN_ONLY: 4262 if alias.args.get("columns"): 4263 self.raise_error("Unexpected extra column alias in unnest.") 4264 4265 alias.set("columns", [alias.this]) 4266 alias.set("this", None) 4267 4268 columns = alias.args.get("columns") or [] 4269 if offset and len(expressions) < len(columns): 4270 offset = columns.pop() 4271 4272 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4273 self._match(TokenType.ALIAS) 4274 offset = self._parse_id_var( 4275 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4276 ) or exp.to_identifier("offset") 4277 4278 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4279 4280 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4281 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4282 if not is_derived and not ( 4283 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4284 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4285 ): 4286 return None 4287 4288 expressions = self._parse_csv(self._parse_value) 4289 alias = self._parse_table_alias() 4290 4291 if is_derived: 4292 self._match_r_paren() 4293 4294 return self.expression( 4295 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4296 ) 4297 4298 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4299 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4300 as_modifier and self._match_text_seq("USING", "SAMPLE") 4301 ): 4302 return None 4303 4304 bucket_numerator = None 4305 bucket_denominator = None 4306 bucket_field = None 4307 percent = None 4308 size = None 4309 seed = None 4310 4311 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4312 matched_l_paren = self._match(TokenType.L_PAREN) 4313 4314 if self.TABLESAMPLE_CSV: 4315 num = None 4316 expressions = self._parse_csv(self._parse_primary) 4317 else: 4318 expressions = None 4319 num = ( 4320 self._parse_factor() 4321 if self._match(TokenType.NUMBER, advance=False) 4322 else self._parse_primary() or self._parse_placeholder() 4323 ) 4324 4325 if self._match_text_seq("BUCKET"): 4326 bucket_numerator = self._parse_number() 4327 self._match_text_seq("OUT", "OF") 4328 bucket_denominator = bucket_denominator = self._parse_number() 4329 self._match(TokenType.ON) 4330 bucket_field = self._parse_field() 4331 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4332 percent = num 4333 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4334 size = num 4335 else: 4336 percent = num 4337 4338 if matched_l_paren: 4339 self._match_r_paren() 4340 4341 if self._match(TokenType.L_PAREN): 4342 method = self._parse_var(upper=True) 4343 seed = self._match(TokenType.COMMA) and self._parse_number() 4344 self._match_r_paren() 4345 elif self._match_texts(("SEED", "REPEATABLE")): 4346 seed = self._parse_wrapped(self._parse_number) 4347 4348 if not method and self.DEFAULT_SAMPLING_METHOD: 4349 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4350 4351 return self.expression( 4352 exp.TableSample, 4353 expressions=expressions, 4354 method=method, 4355 bucket_numerator=bucket_numerator, 4356 bucket_denominator=bucket_denominator, 4357 bucket_field=bucket_field, 4358 percent=percent, 4359 size=size, 4360 seed=seed, 4361 ) 4362 4363 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4364 return list(iter(self._parse_pivot, None)) or None 4365 4366 def _parse_joins(self) -> t.Iterator[exp.Join]: 4367 return iter(self._parse_join, None) 4368 4369 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4370 if not self._match(TokenType.INTO): 4371 return None 4372 4373 return self.expression( 4374 exp.UnpivotColumns, 4375 this=self._match_text_seq("NAME") and self._parse_column(), 4376 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4377 ) 4378 4379 # https://duckdb.org/docs/sql/statements/pivot 4380 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4381 def _parse_on() -> t.Optional[exp.Expression]: 4382 this = self._parse_bitwise() 4383 4384 if self._match(TokenType.IN): 4385 # PIVOT ... ON col IN (row_val1, row_val2) 4386 return self._parse_in(this) 4387 if self._match(TokenType.ALIAS, advance=False): 4388 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4389 return self._parse_alias(this) 4390 4391 return this 4392 4393 this = self._parse_table() 4394 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4395 into = self._parse_unpivot_columns() 4396 using = self._match(TokenType.USING) and self._parse_csv( 4397 lambda: self._parse_alias(self._parse_function()) 4398 ) 4399 group = self._parse_group() 4400 4401 return self.expression( 4402 exp.Pivot, 4403 this=this, 4404 expressions=expressions, 4405 using=using, 4406 group=group, 4407 unpivot=is_unpivot, 4408 into=into, 4409 ) 4410 4411 def _parse_pivot_in(self) -> exp.In: 4412 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4413 this = self._parse_select_or_expression() 4414 4415 self._match(TokenType.ALIAS) 4416 alias = self._parse_bitwise() 4417 if alias: 4418 if isinstance(alias, exp.Column) and not alias.db: 4419 alias = alias.this 4420 return self.expression(exp.PivotAlias, this=this, alias=alias) 4421 4422 return this 4423 4424 value = self._parse_column() 4425 4426 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4427 self.raise_error("Expecting IN (") 4428 4429 if self._match(TokenType.ANY): 4430 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4431 else: 4432 exprs = self._parse_csv(_parse_aliased_expression) 4433 4434 self._match_r_paren() 4435 return self.expression(exp.In, this=value, expressions=exprs) 4436 4437 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4438 func = self._parse_function() 4439 if not func: 4440 if self._prev and self._prev.token_type == TokenType.COMMA: 4441 return None 4442 self.raise_error("Expecting an aggregation function in PIVOT") 4443 4444 return self._parse_alias(func) 4445 4446 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4447 index = self._index 4448 include_nulls = None 4449 4450 if self._match(TokenType.PIVOT): 4451 unpivot = False 4452 elif self._match(TokenType.UNPIVOT): 4453 unpivot = True 4454 4455 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4456 if self._match_text_seq("INCLUDE", "NULLS"): 4457 include_nulls = True 4458 elif self._match_text_seq("EXCLUDE", "NULLS"): 4459 include_nulls = False 4460 else: 4461 return None 4462 4463 expressions = [] 4464 4465 if not self._match(TokenType.L_PAREN): 4466 self._retreat(index) 4467 return None 4468 4469 if unpivot: 4470 expressions = self._parse_csv(self._parse_column) 4471 else: 4472 expressions = self._parse_csv(self._parse_pivot_aggregation) 4473 4474 if not expressions: 4475 self.raise_error("Failed to parse PIVOT's aggregation list") 4476 4477 if not self._match(TokenType.FOR): 4478 self.raise_error("Expecting FOR") 4479 4480 fields = [] 4481 while True: 4482 field = self._try_parse(self._parse_pivot_in) 4483 if not field: 4484 break 4485 fields.append(field) 4486 4487 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4488 self._parse_bitwise 4489 ) 4490 4491 group = self._parse_group() 4492 4493 self._match_r_paren() 4494 4495 pivot = self.expression( 4496 exp.Pivot, 4497 expressions=expressions, 4498 fields=fields, 4499 unpivot=unpivot, 4500 include_nulls=include_nulls, 4501 default_on_null=default_on_null, 4502 group=group, 4503 ) 4504 4505 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4506 pivot.set("alias", self._parse_table_alias()) 4507 4508 if not unpivot: 4509 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4510 4511 columns: t.List[exp.Expression] = [] 4512 all_fields = [] 4513 for pivot_field in pivot.fields: 4514 pivot_field_expressions = pivot_field.expressions 4515 4516 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4517 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4518 continue 4519 4520 all_fields.append( 4521 [ 4522 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4523 for fld in pivot_field_expressions 4524 ] 4525 ) 4526 4527 if all_fields: 4528 if names: 4529 all_fields.append(names) 4530 4531 # Generate all possible combinations of the pivot columns 4532 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4533 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4534 for fld_parts_tuple in itertools.product(*all_fields): 4535 fld_parts = list(fld_parts_tuple) 4536 4537 if names and self.PREFIXED_PIVOT_COLUMNS: 4538 # Move the "name" to the front of the list 4539 fld_parts.insert(0, fld_parts.pop(-1)) 4540 4541 columns.append(exp.to_identifier("_".join(fld_parts))) 4542 4543 pivot.set("columns", columns) 4544 4545 return pivot 4546 4547 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4548 return [agg.alias for agg in aggregations if agg.alias] 4549 4550 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4551 if not skip_where_token and not self._match(TokenType.PREWHERE): 4552 return None 4553 4554 return self.expression( 4555 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4556 ) 4557 4558 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4559 if not skip_where_token and not self._match(TokenType.WHERE): 4560 return None 4561 4562 return self.expression( 4563 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4564 ) 4565 4566 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4567 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4568 return None 4569 comments = self._prev_comments 4570 4571 elements: t.Dict[str, t.Any] = defaultdict(list) 4572 4573 if self._match(TokenType.ALL): 4574 elements["all"] = True 4575 elif self._match(TokenType.DISTINCT): 4576 elements["all"] = False 4577 4578 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4579 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4580 4581 while True: 4582 index = self._index 4583 4584 elements["expressions"].extend( 4585 self._parse_csv( 4586 lambda: None 4587 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4588 else self._parse_assignment() 4589 ) 4590 ) 4591 4592 before_with_index = self._index 4593 with_prefix = self._match(TokenType.WITH) 4594 4595 if self._match(TokenType.ROLLUP): 4596 elements["rollup"].append( 4597 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4598 ) 4599 elif self._match(TokenType.CUBE): 4600 elements["cube"].append( 4601 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4602 ) 4603 elif self._match(TokenType.GROUPING_SETS): 4604 elements["grouping_sets"].append( 4605 self.expression( 4606 exp.GroupingSets, 4607 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4608 ) 4609 ) 4610 elif self._match_text_seq("TOTALS"): 4611 elements["totals"] = True # type: ignore 4612 4613 if before_with_index <= self._index <= before_with_index + 1: 4614 self._retreat(before_with_index) 4615 break 4616 4617 if index == self._index: 4618 break 4619 4620 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4621 4622 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4623 return self.expression( 4624 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4625 ) 4626 4627 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4628 if self._match(TokenType.L_PAREN): 4629 grouping_set = self._parse_csv(self._parse_bitwise) 4630 self._match_r_paren() 4631 return self.expression(exp.Tuple, expressions=grouping_set) 4632 4633 return self._parse_column() 4634 4635 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4636 if not skip_having_token and not self._match(TokenType.HAVING): 4637 return None 4638 return self.expression( 4639 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4640 ) 4641 4642 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4643 if not self._match(TokenType.QUALIFY): 4644 return None 4645 return self.expression(exp.Qualify, this=self._parse_assignment()) 4646 4647 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4648 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4649 exp.Prior, this=self._parse_bitwise() 4650 ) 4651 connect = self._parse_assignment() 4652 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4653 return connect 4654 4655 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4656 if skip_start_token: 4657 start = None 4658 elif self._match(TokenType.START_WITH): 4659 start = self._parse_assignment() 4660 else: 4661 return None 4662 4663 self._match(TokenType.CONNECT_BY) 4664 nocycle = self._match_text_seq("NOCYCLE") 4665 connect = self._parse_connect_with_prior() 4666 4667 if not start and self._match(TokenType.START_WITH): 4668 start = self._parse_assignment() 4669 4670 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4671 4672 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4673 this = self._parse_id_var(any_token=True) 4674 if self._match(TokenType.ALIAS): 4675 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4676 return this 4677 4678 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4679 if self._match_text_seq("INTERPOLATE"): 4680 return self._parse_wrapped_csv(self._parse_name_as_expression) 4681 return None 4682 4683 def _parse_order( 4684 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4685 ) -> t.Optional[exp.Expression]: 4686 siblings = None 4687 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4688 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4689 return this 4690 4691 siblings = True 4692 4693 return self.expression( 4694 exp.Order, 4695 comments=self._prev_comments, 4696 this=this, 4697 expressions=self._parse_csv(self._parse_ordered), 4698 siblings=siblings, 4699 ) 4700 4701 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4702 if not self._match(token): 4703 return None 4704 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4705 4706 def _parse_ordered( 4707 self, parse_method: t.Optional[t.Callable] = None 4708 ) -> t.Optional[exp.Ordered]: 4709 this = parse_method() if parse_method else self._parse_assignment() 4710 if not this: 4711 return None 4712 4713 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4714 this = exp.var("ALL") 4715 4716 asc = self._match(TokenType.ASC) 4717 desc = self._match(TokenType.DESC) or (asc and False) 4718 4719 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4720 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4721 4722 nulls_first = is_nulls_first or False 4723 explicitly_null_ordered = is_nulls_first or is_nulls_last 4724 4725 if ( 4726 not explicitly_null_ordered 4727 and ( 4728 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4729 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4730 ) 4731 and self.dialect.NULL_ORDERING != "nulls_are_last" 4732 ): 4733 nulls_first = True 4734 4735 if self._match_text_seq("WITH", "FILL"): 4736 with_fill = self.expression( 4737 exp.WithFill, 4738 **{ # type: ignore 4739 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4740 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4741 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4742 "interpolate": self._parse_interpolate(), 4743 }, 4744 ) 4745 else: 4746 with_fill = None 4747 4748 return self.expression( 4749 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4750 ) 4751 4752 def _parse_limit_options(self) -> exp.LimitOptions: 4753 percent = self._match(TokenType.PERCENT) 4754 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4755 self._match_text_seq("ONLY") 4756 with_ties = self._match_text_seq("WITH", "TIES") 4757 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4758 4759 def _parse_limit( 4760 self, 4761 this: t.Optional[exp.Expression] = None, 4762 top: bool = False, 4763 skip_limit_token: bool = False, 4764 ) -> t.Optional[exp.Expression]: 4765 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4766 comments = self._prev_comments 4767 if top: 4768 limit_paren = self._match(TokenType.L_PAREN) 4769 expression = self._parse_term() if limit_paren else self._parse_number() 4770 4771 if limit_paren: 4772 self._match_r_paren() 4773 4774 limit_options = self._parse_limit_options() 4775 else: 4776 limit_options = None 4777 expression = self._parse_term() 4778 4779 if self._match(TokenType.COMMA): 4780 offset = expression 4781 expression = self._parse_term() 4782 else: 4783 offset = None 4784 4785 limit_exp = self.expression( 4786 exp.Limit, 4787 this=this, 4788 expression=expression, 4789 offset=offset, 4790 comments=comments, 4791 limit_options=limit_options, 4792 expressions=self._parse_limit_by(), 4793 ) 4794 4795 return limit_exp 4796 4797 if self._match(TokenType.FETCH): 4798 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4799 direction = self._prev.text.upper() if direction else "FIRST" 4800 4801 count = self._parse_field(tokens=self.FETCH_TOKENS) 4802 4803 return self.expression( 4804 exp.Fetch, 4805 direction=direction, 4806 count=count, 4807 limit_options=self._parse_limit_options(), 4808 ) 4809 4810 return this 4811 4812 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4813 if not self._match(TokenType.OFFSET): 4814 return this 4815 4816 count = self._parse_term() 4817 self._match_set((TokenType.ROW, TokenType.ROWS)) 4818 4819 return self.expression( 4820 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4821 ) 4822 4823 def _can_parse_limit_or_offset(self) -> bool: 4824 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4825 return False 4826 4827 index = self._index 4828 result = bool( 4829 self._try_parse(self._parse_limit, retreat=True) 4830 or self._try_parse(self._parse_offset, retreat=True) 4831 ) 4832 self._retreat(index) 4833 return result 4834 4835 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4836 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4837 4838 def _parse_locks(self) -> t.List[exp.Lock]: 4839 locks = [] 4840 while True: 4841 update, key = None, None 4842 if self._match_text_seq("FOR", "UPDATE"): 4843 update = True 4844 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4845 "LOCK", "IN", "SHARE", "MODE" 4846 ): 4847 update = False 4848 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4849 update, key = False, True 4850 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4851 update, key = True, True 4852 else: 4853 break 4854 4855 expressions = None 4856 if self._match_text_seq("OF"): 4857 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4858 4859 wait: t.Optional[bool | exp.Expression] = None 4860 if self._match_text_seq("NOWAIT"): 4861 wait = True 4862 elif self._match_text_seq("WAIT"): 4863 wait = self._parse_primary() 4864 elif self._match_text_seq("SKIP", "LOCKED"): 4865 wait = False 4866 4867 locks.append( 4868 self.expression( 4869 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4870 ) 4871 ) 4872 4873 return locks 4874 4875 def parse_set_operation( 4876 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4877 ) -> t.Optional[exp.Expression]: 4878 start = self._index 4879 _, side_token, kind_token = self._parse_join_parts() 4880 4881 side = side_token.text if side_token else None 4882 kind = kind_token.text if kind_token else None 4883 4884 if not self._match_set(self.SET_OPERATIONS): 4885 self._retreat(start) 4886 return None 4887 4888 token_type = self._prev.token_type 4889 4890 if token_type == TokenType.UNION: 4891 operation: t.Type[exp.SetOperation] = exp.Union 4892 elif token_type == TokenType.EXCEPT: 4893 operation = exp.Except 4894 else: 4895 operation = exp.Intersect 4896 4897 comments = self._prev.comments 4898 4899 if self._match(TokenType.DISTINCT): 4900 distinct: t.Optional[bool] = True 4901 elif self._match(TokenType.ALL): 4902 distinct = False 4903 else: 4904 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4905 if distinct is None: 4906 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4907 4908 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4909 "STRICT", "CORRESPONDING" 4910 ) 4911 if self._match_text_seq("CORRESPONDING"): 4912 by_name = True 4913 if not side and not kind: 4914 kind = "INNER" 4915 4916 on_column_list = None 4917 if by_name and self._match_texts(("ON", "BY")): 4918 on_column_list = self._parse_wrapped_csv(self._parse_column) 4919 4920 expression = self._parse_select( 4921 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4922 ) 4923 4924 return self.expression( 4925 operation, 4926 comments=comments, 4927 this=this, 4928 distinct=distinct, 4929 by_name=by_name, 4930 expression=expression, 4931 side=side, 4932 kind=kind, 4933 on=on_column_list, 4934 ) 4935 4936 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4937 while this: 4938 setop = self.parse_set_operation(this) 4939 if not setop: 4940 break 4941 this = setop 4942 4943 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4944 expression = this.expression 4945 4946 if expression: 4947 for arg in self.SET_OP_MODIFIERS: 4948 expr = expression.args.get(arg) 4949 if expr: 4950 this.set(arg, expr.pop()) 4951 4952 return this 4953 4954 def _parse_expression(self) -> t.Optional[exp.Expression]: 4955 return self._parse_alias(self._parse_assignment()) 4956 4957 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4958 this = self._parse_disjunction() 4959 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4960 # This allows us to parse <non-identifier token> := <expr> 4961 this = exp.column( 4962 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4963 ) 4964 4965 while self._match_set(self.ASSIGNMENT): 4966 if isinstance(this, exp.Column) and len(this.parts) == 1: 4967 this = this.this 4968 4969 this = self.expression( 4970 self.ASSIGNMENT[self._prev.token_type], 4971 this=this, 4972 comments=self._prev_comments, 4973 expression=self._parse_assignment(), 4974 ) 4975 4976 return this 4977 4978 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4979 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4980 4981 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4982 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4983 4984 def _parse_equality(self) -> t.Optional[exp.Expression]: 4985 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4986 4987 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4988 return self._parse_tokens(self._parse_range, self.COMPARISON) 4989 4990 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4991 this = this or self._parse_bitwise() 4992 negate = self._match(TokenType.NOT) 4993 4994 if self._match_set(self.RANGE_PARSERS): 4995 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4996 if not expression: 4997 return this 4998 4999 this = expression 5000 elif self._match(TokenType.ISNULL): 5001 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5002 5003 # Postgres supports ISNULL and NOTNULL for conditions. 5004 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5005 if self._match(TokenType.NOTNULL): 5006 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5007 this = self.expression(exp.Not, this=this) 5008 5009 if negate: 5010 this = self._negate_range(this) 5011 5012 if self._match(TokenType.IS): 5013 this = self._parse_is(this) 5014 5015 return this 5016 5017 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5018 if not this: 5019 return this 5020 5021 return self.expression(exp.Not, this=this) 5022 5023 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5024 index = self._index - 1 5025 negate = self._match(TokenType.NOT) 5026 5027 if self._match_text_seq("DISTINCT", "FROM"): 5028 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5029 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5030 5031 if self._match(TokenType.JSON): 5032 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5033 5034 if self._match_text_seq("WITH"): 5035 _with = True 5036 elif self._match_text_seq("WITHOUT"): 5037 _with = False 5038 else: 5039 _with = None 5040 5041 unique = self._match(TokenType.UNIQUE) 5042 self._match_text_seq("KEYS") 5043 expression: t.Optional[exp.Expression] = self.expression( 5044 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5045 ) 5046 else: 5047 expression = self._parse_primary() or self._parse_null() 5048 if not expression: 5049 self._retreat(index) 5050 return None 5051 5052 this = self.expression(exp.Is, this=this, expression=expression) 5053 return self.expression(exp.Not, this=this) if negate else this 5054 5055 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5056 unnest = self._parse_unnest(with_alias=False) 5057 if unnest: 5058 this = self.expression(exp.In, this=this, unnest=unnest) 5059 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5060 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5061 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5062 5063 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5064 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5065 else: 5066 this = self.expression(exp.In, this=this, expressions=expressions) 5067 5068 if matched_l_paren: 5069 self._match_r_paren(this) 5070 elif not self._match(TokenType.R_BRACKET, expression=this): 5071 self.raise_error("Expecting ]") 5072 else: 5073 this = self.expression(exp.In, this=this, field=self._parse_column()) 5074 5075 return this 5076 5077 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5078 symmetric = None 5079 if self._match_text_seq("SYMMETRIC"): 5080 symmetric = True 5081 elif self._match_text_seq("ASYMMETRIC"): 5082 symmetric = False 5083 5084 low = self._parse_bitwise() 5085 self._match(TokenType.AND) 5086 high = self._parse_bitwise() 5087 5088 return self.expression( 5089 exp.Between, 5090 this=this, 5091 low=low, 5092 high=high, 5093 symmetric=symmetric, 5094 ) 5095 5096 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5097 if not self._match(TokenType.ESCAPE): 5098 return this 5099 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5100 5101 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5102 index = self._index 5103 5104 if not self._match(TokenType.INTERVAL) and match_interval: 5105 return None 5106 5107 if self._match(TokenType.STRING, advance=False): 5108 this = self._parse_primary() 5109 else: 5110 this = self._parse_term() 5111 5112 if not this or ( 5113 isinstance(this, exp.Column) 5114 and not this.table 5115 and not this.this.quoted 5116 and self._curr 5117 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5118 ): 5119 self._retreat(index) 5120 return None 5121 5122 # handle day-time format interval span with omitted units: 5123 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5124 interval_span_units_omitted = None 5125 if ( 5126 this 5127 and this.is_string 5128 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5129 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5130 ): 5131 index = self._index 5132 5133 # Var "TO" Var 5134 first_unit = self._parse_var(any_token=True, upper=True) 5135 second_unit = None 5136 if first_unit and self._match_text_seq("TO"): 5137 second_unit = self._parse_var(any_token=True, upper=True) 5138 5139 interval_span_units_omitted = not (first_unit and second_unit) 5140 5141 self._retreat(index) 5142 5143 unit = ( 5144 None 5145 if interval_span_units_omitted 5146 else ( 5147 self._parse_function() 5148 or ( 5149 not self._match(TokenType.ALIAS, advance=False) 5150 and self._parse_var(any_token=True, upper=True) 5151 ) 5152 ) 5153 ) 5154 5155 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5156 # each INTERVAL expression into this canonical form so it's easy to transpile 5157 if this and this.is_number: 5158 this = exp.Literal.string(this.to_py()) 5159 elif this and this.is_string: 5160 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5161 if parts and unit: 5162 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5163 unit = None 5164 self._retreat(self._index - 1) 5165 5166 if len(parts) == 1: 5167 this = exp.Literal.string(parts[0][0]) 5168 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5169 5170 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5171 unit = self.expression( 5172 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5173 ) 5174 5175 interval = self.expression(exp.Interval, this=this, unit=unit) 5176 5177 index = self._index 5178 self._match(TokenType.PLUS) 5179 5180 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5181 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5182 return self.expression( 5183 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5184 ) 5185 5186 self._retreat(index) 5187 return interval 5188 5189 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5190 this = self._parse_term() 5191 5192 while True: 5193 if self._match_set(self.BITWISE): 5194 this = self.expression( 5195 self.BITWISE[self._prev.token_type], 5196 this=this, 5197 expression=self._parse_term(), 5198 ) 5199 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5200 this = self.expression( 5201 exp.DPipe, 5202 this=this, 5203 expression=self._parse_term(), 5204 safe=not self.dialect.STRICT_STRING_CONCAT, 5205 ) 5206 elif self._match(TokenType.DQMARK): 5207 this = self.expression( 5208 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5209 ) 5210 elif self._match_pair(TokenType.LT, TokenType.LT): 5211 this = self.expression( 5212 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5213 ) 5214 elif self._match_pair(TokenType.GT, TokenType.GT): 5215 this = self.expression( 5216 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5217 ) 5218 else: 5219 break 5220 5221 return this 5222 5223 def _parse_term(self) -> t.Optional[exp.Expression]: 5224 this = self._parse_factor() 5225 5226 while self._match_set(self.TERM): 5227 klass = self.TERM[self._prev.token_type] 5228 comments = self._prev_comments 5229 expression = self._parse_factor() 5230 5231 this = self.expression(klass, this=this, comments=comments, expression=expression) 5232 5233 if isinstance(this, exp.Collate): 5234 expr = this.expression 5235 5236 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5237 # fallback to Identifier / Var 5238 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5239 ident = expr.this 5240 if isinstance(ident, exp.Identifier): 5241 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5242 5243 return this 5244 5245 def _parse_factor(self) -> t.Optional[exp.Expression]: 5246 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5247 this = parse_method() 5248 5249 while self._match_set(self.FACTOR): 5250 klass = self.FACTOR[self._prev.token_type] 5251 comments = self._prev_comments 5252 expression = parse_method() 5253 5254 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5255 self._retreat(self._index - 1) 5256 return this 5257 5258 this = self.expression(klass, this=this, comments=comments, expression=expression) 5259 5260 if isinstance(this, exp.Div): 5261 this.args["typed"] = self.dialect.TYPED_DIVISION 5262 this.args["safe"] = self.dialect.SAFE_DIVISION 5263 5264 return this 5265 5266 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5267 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5268 5269 def _parse_unary(self) -> t.Optional[exp.Expression]: 5270 if self._match_set(self.UNARY_PARSERS): 5271 return self.UNARY_PARSERS[self._prev.token_type](self) 5272 return self._parse_at_time_zone(self._parse_type()) 5273 5274 def _parse_type( 5275 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5276 ) -> t.Optional[exp.Expression]: 5277 interval = parse_interval and self._parse_interval() 5278 if interval: 5279 return interval 5280 5281 index = self._index 5282 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5283 5284 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5285 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5286 if isinstance(data_type, exp.Cast): 5287 # This constructor can contain ops directly after it, for instance struct unnesting: 5288 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5289 return self._parse_column_ops(data_type) 5290 5291 if data_type: 5292 index2 = self._index 5293 this = self._parse_primary() 5294 5295 if isinstance(this, exp.Literal): 5296 literal = this.name 5297 this = self._parse_column_ops(this) 5298 5299 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5300 if parser: 5301 return parser(self, this, data_type) 5302 5303 if ( 5304 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5305 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5306 and TIME_ZONE_RE.search(literal) 5307 ): 5308 data_type = exp.DataType.build("TIMESTAMPTZ") 5309 5310 return self.expression(exp.Cast, this=this, to=data_type) 5311 5312 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5313 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5314 # 5315 # If the index difference here is greater than 1, that means the parser itself must have 5316 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5317 # 5318 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5319 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5320 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5321 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5322 # 5323 # In these cases, we don't really want to return the converted type, but instead retreat 5324 # and try to parse a Column or Identifier in the section below. 5325 if data_type.expressions and index2 - index > 1: 5326 self._retreat(index2) 5327 return self._parse_column_ops(data_type) 5328 5329 self._retreat(index) 5330 5331 if fallback_to_identifier: 5332 return self._parse_id_var() 5333 5334 this = self._parse_column() 5335 return this and self._parse_column_ops(this) 5336 5337 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5338 this = self._parse_type() 5339 if not this: 5340 return None 5341 5342 if isinstance(this, exp.Column) and not this.table: 5343 this = exp.var(this.name.upper()) 5344 5345 return self.expression( 5346 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5347 ) 5348 5349 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5350 type_name = identifier.name 5351 5352 while self._match(TokenType.DOT): 5353 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5354 5355 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5356 5357 def _parse_types( 5358 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5359 ) -> t.Optional[exp.Expression]: 5360 index = self._index 5361 5362 this: t.Optional[exp.Expression] = None 5363 prefix = self._match_text_seq("SYSUDTLIB", ".") 5364 5365 if self._match_set(self.TYPE_TOKENS): 5366 type_token = self._prev.token_type 5367 else: 5368 type_token = None 5369 identifier = allow_identifiers and self._parse_id_var( 5370 any_token=False, tokens=(TokenType.VAR,) 5371 ) 5372 if isinstance(identifier, exp.Identifier): 5373 try: 5374 tokens = self.dialect.tokenize(identifier.name) 5375 except TokenError: 5376 tokens = None 5377 5378 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5379 type_token = tokens[0].token_type 5380 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5381 this = self._parse_user_defined_type(identifier) 5382 else: 5383 self._retreat(self._index - 1) 5384 return None 5385 else: 5386 return None 5387 5388 if type_token == TokenType.PSEUDO_TYPE: 5389 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5390 5391 if type_token == TokenType.OBJECT_IDENTIFIER: 5392 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5393 5394 # https://materialize.com/docs/sql/types/map/ 5395 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5396 key_type = self._parse_types( 5397 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5398 ) 5399 if not self._match(TokenType.FARROW): 5400 self._retreat(index) 5401 return None 5402 5403 value_type = self._parse_types( 5404 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5405 ) 5406 if not self._match(TokenType.R_BRACKET): 5407 self._retreat(index) 5408 return None 5409 5410 return exp.DataType( 5411 this=exp.DataType.Type.MAP, 5412 expressions=[key_type, value_type], 5413 nested=True, 5414 prefix=prefix, 5415 ) 5416 5417 nested = type_token in self.NESTED_TYPE_TOKENS 5418 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5419 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5420 expressions = None 5421 maybe_func = False 5422 5423 if self._match(TokenType.L_PAREN): 5424 if is_struct: 5425 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5426 elif nested: 5427 expressions = self._parse_csv( 5428 lambda: self._parse_types( 5429 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5430 ) 5431 ) 5432 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5433 this = expressions[0] 5434 this.set("nullable", True) 5435 self._match_r_paren() 5436 return this 5437 elif type_token in self.ENUM_TYPE_TOKENS: 5438 expressions = self._parse_csv(self._parse_equality) 5439 elif is_aggregate: 5440 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5441 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5442 ) 5443 if not func_or_ident: 5444 return None 5445 expressions = [func_or_ident] 5446 if self._match(TokenType.COMMA): 5447 expressions.extend( 5448 self._parse_csv( 5449 lambda: self._parse_types( 5450 check_func=check_func, 5451 schema=schema, 5452 allow_identifiers=allow_identifiers, 5453 ) 5454 ) 5455 ) 5456 else: 5457 expressions = self._parse_csv(self._parse_type_size) 5458 5459 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5460 if type_token == TokenType.VECTOR and len(expressions) == 2: 5461 expressions = self._parse_vector_expressions(expressions) 5462 5463 if not self._match(TokenType.R_PAREN): 5464 self._retreat(index) 5465 return None 5466 5467 maybe_func = True 5468 5469 values: t.Optional[t.List[exp.Expression]] = None 5470 5471 if nested and self._match(TokenType.LT): 5472 if is_struct: 5473 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5474 else: 5475 expressions = self._parse_csv( 5476 lambda: self._parse_types( 5477 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5478 ) 5479 ) 5480 5481 if not self._match(TokenType.GT): 5482 self.raise_error("Expecting >") 5483 5484 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5485 values = self._parse_csv(self._parse_assignment) 5486 if not values and is_struct: 5487 values = None 5488 self._retreat(self._index - 1) 5489 else: 5490 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5491 5492 if type_token in self.TIMESTAMPS: 5493 if self._match_text_seq("WITH", "TIME", "ZONE"): 5494 maybe_func = False 5495 tz_type = ( 5496 exp.DataType.Type.TIMETZ 5497 if type_token in self.TIMES 5498 else exp.DataType.Type.TIMESTAMPTZ 5499 ) 5500 this = exp.DataType(this=tz_type, expressions=expressions) 5501 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5502 maybe_func = False 5503 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5504 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5505 maybe_func = False 5506 elif type_token == TokenType.INTERVAL: 5507 unit = self._parse_var(upper=True) 5508 if unit: 5509 if self._match_text_seq("TO"): 5510 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5511 5512 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5513 else: 5514 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5515 elif type_token == TokenType.VOID: 5516 this = exp.DataType(this=exp.DataType.Type.NULL) 5517 5518 if maybe_func and check_func: 5519 index2 = self._index 5520 peek = self._parse_string() 5521 5522 if not peek: 5523 self._retreat(index) 5524 return None 5525 5526 self._retreat(index2) 5527 5528 if not this: 5529 if self._match_text_seq("UNSIGNED"): 5530 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5531 if not unsigned_type_token: 5532 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5533 5534 type_token = unsigned_type_token or type_token 5535 5536 # NULLABLE without parentheses can be a column (Presto/Trino) 5537 if type_token == TokenType.NULLABLE and not expressions: 5538 self._retreat(index) 5539 return None 5540 5541 this = exp.DataType( 5542 this=exp.DataType.Type[type_token.value], 5543 expressions=expressions, 5544 nested=nested, 5545 prefix=prefix, 5546 ) 5547 5548 # Empty arrays/structs are allowed 5549 if values is not None: 5550 cls = exp.Struct if is_struct else exp.Array 5551 this = exp.cast(cls(expressions=values), this, copy=False) 5552 5553 elif expressions: 5554 this.set("expressions", expressions) 5555 5556 # https://materialize.com/docs/sql/types/list/#type-name 5557 while self._match(TokenType.LIST): 5558 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5559 5560 index = self._index 5561 5562 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5563 matched_array = self._match(TokenType.ARRAY) 5564 5565 while self._curr: 5566 datatype_token = self._prev.token_type 5567 matched_l_bracket = self._match(TokenType.L_BRACKET) 5568 5569 if (not matched_l_bracket and not matched_array) or ( 5570 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5571 ): 5572 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5573 # not to be confused with the fixed size array parsing 5574 break 5575 5576 matched_array = False 5577 values = self._parse_csv(self._parse_assignment) or None 5578 if ( 5579 values 5580 and not schema 5581 and ( 5582 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5583 ) 5584 ): 5585 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5586 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5587 self._retreat(index) 5588 break 5589 5590 this = exp.DataType( 5591 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5592 ) 5593 self._match(TokenType.R_BRACKET) 5594 5595 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5596 converter = self.TYPE_CONVERTERS.get(this.this) 5597 if converter: 5598 this = converter(t.cast(exp.DataType, this)) 5599 5600 return this 5601 5602 def _parse_vector_expressions( 5603 self, expressions: t.List[exp.Expression] 5604 ) -> t.List[exp.Expression]: 5605 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5606 5607 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5608 index = self._index 5609 5610 if ( 5611 self._curr 5612 and self._next 5613 and self._curr.token_type in self.TYPE_TOKENS 5614 and self._next.token_type in self.TYPE_TOKENS 5615 ): 5616 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5617 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5618 this = self._parse_id_var() 5619 else: 5620 this = ( 5621 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5622 or self._parse_id_var() 5623 ) 5624 5625 self._match(TokenType.COLON) 5626 5627 if ( 5628 type_required 5629 and not isinstance(this, exp.DataType) 5630 and not self._match_set(self.TYPE_TOKENS, advance=False) 5631 ): 5632 self._retreat(index) 5633 return self._parse_types() 5634 5635 return self._parse_column_def(this) 5636 5637 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5638 if not self._match_text_seq("AT", "TIME", "ZONE"): 5639 return this 5640 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5641 5642 def _parse_column(self) -> t.Optional[exp.Expression]: 5643 this = self._parse_column_reference() 5644 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5645 5646 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5647 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5648 5649 return column 5650 5651 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5652 this = self._parse_field() 5653 if ( 5654 not this 5655 and self._match(TokenType.VALUES, advance=False) 5656 and self.VALUES_FOLLOWED_BY_PAREN 5657 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5658 ): 5659 this = self._parse_id_var() 5660 5661 if isinstance(this, exp.Identifier): 5662 # We bubble up comments from the Identifier to the Column 5663 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5664 5665 return this 5666 5667 def _parse_colon_as_variant_extract( 5668 self, this: t.Optional[exp.Expression] 5669 ) -> t.Optional[exp.Expression]: 5670 casts = [] 5671 json_path = [] 5672 escape = None 5673 5674 while self._match(TokenType.COLON): 5675 start_index = self._index 5676 5677 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5678 path = self._parse_column_ops( 5679 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5680 ) 5681 5682 # The cast :: operator has a lower precedence than the extraction operator :, so 5683 # we rearrange the AST appropriately to avoid casting the JSON path 5684 while isinstance(path, exp.Cast): 5685 casts.append(path.to) 5686 path = path.this 5687 5688 if casts: 5689 dcolon_offset = next( 5690 i 5691 for i, t in enumerate(self._tokens[start_index:]) 5692 if t.token_type == TokenType.DCOLON 5693 ) 5694 end_token = self._tokens[start_index + dcolon_offset - 1] 5695 else: 5696 end_token = self._prev 5697 5698 if path: 5699 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5700 # it'll roundtrip to a string literal in GET_PATH 5701 if isinstance(path, exp.Identifier) and path.quoted: 5702 escape = True 5703 5704 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5705 5706 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5707 # Databricks transforms it back to the colon/dot notation 5708 if json_path: 5709 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5710 5711 if json_path_expr: 5712 json_path_expr.set("escape", escape) 5713 5714 this = self.expression( 5715 exp.JSONExtract, 5716 this=this, 5717 expression=json_path_expr, 5718 variant_extract=True, 5719 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5720 ) 5721 5722 while casts: 5723 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5724 5725 return this 5726 5727 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5728 return self._parse_types() 5729 5730 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5731 this = self._parse_bracket(this) 5732 5733 while self._match_set(self.COLUMN_OPERATORS): 5734 op_token = self._prev.token_type 5735 op = self.COLUMN_OPERATORS.get(op_token) 5736 5737 if op_token in self.CAST_COLUMN_OPERATORS: 5738 field = self._parse_dcolon() 5739 if not field: 5740 self.raise_error("Expected type") 5741 elif op and self._curr: 5742 field = self._parse_column_reference() or self._parse_bitwise() 5743 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5744 field = self._parse_column_ops(field) 5745 else: 5746 field = self._parse_field(any_token=True, anonymous_func=True) 5747 5748 # Function calls can be qualified, e.g., x.y.FOO() 5749 # This converts the final AST to a series of Dots leading to the function call 5750 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5751 if isinstance(field, (exp.Func, exp.Window)) and this: 5752 this = this.transform( 5753 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5754 ) 5755 5756 if op: 5757 this = op(self, this, field) 5758 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5759 this = self.expression( 5760 exp.Column, 5761 comments=this.comments, 5762 this=field, 5763 table=this.this, 5764 db=this.args.get("table"), 5765 catalog=this.args.get("db"), 5766 ) 5767 elif isinstance(field, exp.Window): 5768 # Move the exp.Dot's to the window's function 5769 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5770 field.set("this", window_func) 5771 this = field 5772 else: 5773 this = self.expression(exp.Dot, this=this, expression=field) 5774 5775 if field and field.comments: 5776 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5777 5778 this = self._parse_bracket(this) 5779 5780 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5781 5782 def _parse_paren(self) -> t.Optional[exp.Expression]: 5783 if not self._match(TokenType.L_PAREN): 5784 return None 5785 5786 comments = self._prev_comments 5787 query = self._parse_select() 5788 5789 if query: 5790 expressions = [query] 5791 else: 5792 expressions = self._parse_expressions() 5793 5794 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5795 5796 if not this and self._match(TokenType.R_PAREN, advance=False): 5797 this = self.expression(exp.Tuple) 5798 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5799 this = self._parse_subquery(this=this, parse_alias=False) 5800 elif isinstance(this, exp.Subquery): 5801 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5802 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5803 this = self.expression(exp.Tuple, expressions=expressions) 5804 else: 5805 this = self.expression(exp.Paren, this=this) 5806 5807 if this: 5808 this.add_comments(comments) 5809 5810 self._match_r_paren(expression=this) 5811 5812 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5813 return self._parse_window(this) 5814 5815 return this 5816 5817 def _parse_primary(self) -> t.Optional[exp.Expression]: 5818 if self._match_set(self.PRIMARY_PARSERS): 5819 token_type = self._prev.token_type 5820 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5821 5822 if token_type == TokenType.STRING: 5823 expressions = [primary] 5824 while self._match(TokenType.STRING): 5825 expressions.append(exp.Literal.string(self._prev.text)) 5826 5827 if len(expressions) > 1: 5828 return self.expression(exp.Concat, expressions=expressions) 5829 5830 return primary 5831 5832 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5833 return exp.Literal.number(f"0.{self._prev.text}") 5834 5835 return self._parse_paren() 5836 5837 def _parse_field( 5838 self, 5839 any_token: bool = False, 5840 tokens: t.Optional[t.Collection[TokenType]] = None, 5841 anonymous_func: bool = False, 5842 ) -> t.Optional[exp.Expression]: 5843 if anonymous_func: 5844 field = ( 5845 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5846 or self._parse_primary() 5847 ) 5848 else: 5849 field = self._parse_primary() or self._parse_function( 5850 anonymous=anonymous_func, any_token=any_token 5851 ) 5852 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5853 5854 def _parse_function( 5855 self, 5856 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5857 anonymous: bool = False, 5858 optional_parens: bool = True, 5859 any_token: bool = False, 5860 ) -> t.Optional[exp.Expression]: 5861 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5862 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5863 fn_syntax = False 5864 if ( 5865 self._match(TokenType.L_BRACE, advance=False) 5866 and self._next 5867 and self._next.text.upper() == "FN" 5868 ): 5869 self._advance(2) 5870 fn_syntax = True 5871 5872 func = self._parse_function_call( 5873 functions=functions, 5874 anonymous=anonymous, 5875 optional_parens=optional_parens, 5876 any_token=any_token, 5877 ) 5878 5879 if fn_syntax: 5880 self._match(TokenType.R_BRACE) 5881 5882 return func 5883 5884 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5885 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5886 5887 def _parse_function_call( 5888 self, 5889 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5890 anonymous: bool = False, 5891 optional_parens: bool = True, 5892 any_token: bool = False, 5893 ) -> t.Optional[exp.Expression]: 5894 if not self._curr: 5895 return None 5896 5897 comments = self._curr.comments 5898 prev = self._prev 5899 token = self._curr 5900 token_type = self._curr.token_type 5901 this = self._curr.text 5902 upper = this.upper() 5903 5904 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5905 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5906 self._advance() 5907 return self._parse_window(parser(self)) 5908 5909 if not self._next or self._next.token_type != TokenType.L_PAREN: 5910 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5911 self._advance() 5912 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5913 5914 return None 5915 5916 if any_token: 5917 if token_type in self.RESERVED_TOKENS: 5918 return None 5919 elif token_type not in self.FUNC_TOKENS: 5920 return None 5921 5922 self._advance(2) 5923 5924 parser = self.FUNCTION_PARSERS.get(upper) 5925 if parser and not anonymous: 5926 this = parser(self) 5927 else: 5928 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5929 5930 if subquery_predicate: 5931 expr = None 5932 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5933 expr = self._parse_select() 5934 self._match_r_paren() 5935 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5936 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5937 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5938 self._advance(-1) 5939 expr = self._parse_bitwise() 5940 5941 if expr: 5942 return self.expression(subquery_predicate, comments=comments, this=expr) 5943 5944 if functions is None: 5945 functions = self.FUNCTIONS 5946 5947 function = functions.get(upper) 5948 known_function = function and not anonymous 5949 5950 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5951 args = self._parse_function_args(alias) 5952 5953 post_func_comments = self._curr and self._curr.comments 5954 if known_function and post_func_comments: 5955 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5956 # call we'll construct it as exp.Anonymous, even if it's "known" 5957 if any( 5958 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5959 for comment in post_func_comments 5960 ): 5961 known_function = False 5962 5963 if alias and known_function: 5964 args = self._kv_to_prop_eq(args) 5965 5966 if known_function: 5967 func_builder = t.cast(t.Callable, function) 5968 5969 if "dialect" in func_builder.__code__.co_varnames: 5970 func = func_builder(args, dialect=self.dialect) 5971 else: 5972 func = func_builder(args) 5973 5974 func = self.validate_expression(func, args) 5975 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5976 func.meta["name"] = this 5977 5978 this = func 5979 else: 5980 if token_type == TokenType.IDENTIFIER: 5981 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5982 5983 this = self.expression(exp.Anonymous, this=this, expressions=args) 5984 this = this.update_positions(token) 5985 5986 if isinstance(this, exp.Expression): 5987 this.add_comments(comments) 5988 5989 self._match_r_paren(this) 5990 return self._parse_window(this) 5991 5992 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5993 return expression 5994 5995 def _kv_to_prop_eq( 5996 self, expressions: t.List[exp.Expression], parse_map: bool = False 5997 ) -> t.List[exp.Expression]: 5998 transformed = [] 5999 6000 for index, e in enumerate(expressions): 6001 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6002 if isinstance(e, exp.Alias): 6003 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6004 6005 if not isinstance(e, exp.PropertyEQ): 6006 e = self.expression( 6007 exp.PropertyEQ, 6008 this=e.this if parse_map else exp.to_identifier(e.this.name), 6009 expression=e.expression, 6010 ) 6011 6012 if isinstance(e.this, exp.Column): 6013 e.this.replace(e.this.this) 6014 else: 6015 e = self._to_prop_eq(e, index) 6016 6017 transformed.append(e) 6018 6019 return transformed 6020 6021 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6022 return self._parse_statement() 6023 6024 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6025 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6026 6027 def _parse_user_defined_function( 6028 self, kind: t.Optional[TokenType] = None 6029 ) -> t.Optional[exp.Expression]: 6030 this = self._parse_table_parts(schema=True) 6031 6032 if not self._match(TokenType.L_PAREN): 6033 return this 6034 6035 expressions = self._parse_csv(self._parse_function_parameter) 6036 self._match_r_paren() 6037 return self.expression( 6038 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6039 ) 6040 6041 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6042 literal = self._parse_primary() 6043 if literal: 6044 return self.expression(exp.Introducer, this=token.text, expression=literal) 6045 6046 return self._identifier_expression(token) 6047 6048 def _parse_session_parameter(self) -> exp.SessionParameter: 6049 kind = None 6050 this = self._parse_id_var() or self._parse_primary() 6051 6052 if this and self._match(TokenType.DOT): 6053 kind = this.name 6054 this = self._parse_var() or self._parse_primary() 6055 6056 return self.expression(exp.SessionParameter, this=this, kind=kind) 6057 6058 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6059 return self._parse_id_var() 6060 6061 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6062 index = self._index 6063 6064 if self._match(TokenType.L_PAREN): 6065 expressions = t.cast( 6066 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6067 ) 6068 6069 if not self._match(TokenType.R_PAREN): 6070 self._retreat(index) 6071 else: 6072 expressions = [self._parse_lambda_arg()] 6073 6074 if self._match_set(self.LAMBDAS): 6075 return self.LAMBDAS[self._prev.token_type](self, expressions) 6076 6077 self._retreat(index) 6078 6079 this: t.Optional[exp.Expression] 6080 6081 if self._match(TokenType.DISTINCT): 6082 this = self.expression( 6083 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6084 ) 6085 else: 6086 this = self._parse_select_or_expression(alias=alias) 6087 6088 return self._parse_limit( 6089 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6090 ) 6091 6092 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6093 index = self._index 6094 if not self._match(TokenType.L_PAREN): 6095 return this 6096 6097 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6098 # expr can be of both types 6099 if self._match_set(self.SELECT_START_TOKENS): 6100 self._retreat(index) 6101 return this 6102 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6103 self._match_r_paren() 6104 return self.expression(exp.Schema, this=this, expressions=args) 6105 6106 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6107 return self._parse_column_def(self._parse_field(any_token=True)) 6108 6109 def _parse_column_def( 6110 self, this: t.Optional[exp.Expression], computed_column: bool = True 6111 ) -> t.Optional[exp.Expression]: 6112 # column defs are not really columns, they're identifiers 6113 if isinstance(this, exp.Column): 6114 this = this.this 6115 6116 if not computed_column: 6117 self._match(TokenType.ALIAS) 6118 6119 kind = self._parse_types(schema=True) 6120 6121 if self._match_text_seq("FOR", "ORDINALITY"): 6122 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6123 6124 constraints: t.List[exp.Expression] = [] 6125 6126 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6127 ("ALIAS", "MATERIALIZED") 6128 ): 6129 persisted = self._prev.text.upper() == "MATERIALIZED" 6130 constraint_kind = exp.ComputedColumnConstraint( 6131 this=self._parse_assignment(), 6132 persisted=persisted or self._match_text_seq("PERSISTED"), 6133 data_type=exp.Var(this="AUTO") 6134 if self._match_text_seq("AUTO") 6135 else self._parse_types(), 6136 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6137 ) 6138 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6139 elif ( 6140 kind 6141 and self._match(TokenType.ALIAS, advance=False) 6142 and ( 6143 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6144 or (self._next and self._next.token_type == TokenType.L_PAREN) 6145 ) 6146 ): 6147 self._advance() 6148 constraints.append( 6149 self.expression( 6150 exp.ColumnConstraint, 6151 kind=exp.ComputedColumnConstraint( 6152 this=self._parse_disjunction(), 6153 persisted=self._match_texts(("STORED", "VIRTUAL")) 6154 and self._prev.text.upper() == "STORED", 6155 ), 6156 ) 6157 ) 6158 6159 while True: 6160 constraint = self._parse_column_constraint() 6161 if not constraint: 6162 break 6163 constraints.append(constraint) 6164 6165 if not kind and not constraints: 6166 return this 6167 6168 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6169 6170 def _parse_auto_increment( 6171 self, 6172 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6173 start = None 6174 increment = None 6175 order = None 6176 6177 if self._match(TokenType.L_PAREN, advance=False): 6178 args = self._parse_wrapped_csv(self._parse_bitwise) 6179 start = seq_get(args, 0) 6180 increment = seq_get(args, 1) 6181 elif self._match_text_seq("START"): 6182 start = self._parse_bitwise() 6183 self._match_text_seq("INCREMENT") 6184 increment = self._parse_bitwise() 6185 if self._match_text_seq("ORDER"): 6186 order = True 6187 elif self._match_text_seq("NOORDER"): 6188 order = False 6189 6190 if start and increment: 6191 return exp.GeneratedAsIdentityColumnConstraint( 6192 start=start, increment=increment, this=False, order=order 6193 ) 6194 6195 return exp.AutoIncrementColumnConstraint() 6196 6197 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6198 if not self._match_text_seq("REFRESH"): 6199 self._retreat(self._index - 1) 6200 return None 6201 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6202 6203 def _parse_compress(self) -> exp.CompressColumnConstraint: 6204 if self._match(TokenType.L_PAREN, advance=False): 6205 return self.expression( 6206 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6207 ) 6208 6209 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6210 6211 def _parse_generated_as_identity( 6212 self, 6213 ) -> ( 6214 exp.GeneratedAsIdentityColumnConstraint 6215 | exp.ComputedColumnConstraint 6216 | exp.GeneratedAsRowColumnConstraint 6217 ): 6218 if self._match_text_seq("BY", "DEFAULT"): 6219 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6220 this = self.expression( 6221 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6222 ) 6223 else: 6224 self._match_text_seq("ALWAYS") 6225 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6226 6227 self._match(TokenType.ALIAS) 6228 6229 if self._match_text_seq("ROW"): 6230 start = self._match_text_seq("START") 6231 if not start: 6232 self._match(TokenType.END) 6233 hidden = self._match_text_seq("HIDDEN") 6234 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6235 6236 identity = self._match_text_seq("IDENTITY") 6237 6238 if self._match(TokenType.L_PAREN): 6239 if self._match(TokenType.START_WITH): 6240 this.set("start", self._parse_bitwise()) 6241 if self._match_text_seq("INCREMENT", "BY"): 6242 this.set("increment", self._parse_bitwise()) 6243 if self._match_text_seq("MINVALUE"): 6244 this.set("minvalue", self._parse_bitwise()) 6245 if self._match_text_seq("MAXVALUE"): 6246 this.set("maxvalue", self._parse_bitwise()) 6247 6248 if self._match_text_seq("CYCLE"): 6249 this.set("cycle", True) 6250 elif self._match_text_seq("NO", "CYCLE"): 6251 this.set("cycle", False) 6252 6253 if not identity: 6254 this.set("expression", self._parse_range()) 6255 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6256 args = self._parse_csv(self._parse_bitwise) 6257 this.set("start", seq_get(args, 0)) 6258 this.set("increment", seq_get(args, 1)) 6259 6260 self._match_r_paren() 6261 6262 return this 6263 6264 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6265 self._match_text_seq("LENGTH") 6266 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6267 6268 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6269 if self._match_text_seq("NULL"): 6270 return self.expression(exp.NotNullColumnConstraint) 6271 if self._match_text_seq("CASESPECIFIC"): 6272 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6273 if self._match_text_seq("FOR", "REPLICATION"): 6274 return self.expression(exp.NotForReplicationColumnConstraint) 6275 6276 # Unconsume the `NOT` token 6277 self._retreat(self._index - 1) 6278 return None 6279 6280 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6281 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6282 6283 procedure_option_follows = ( 6284 self._match(TokenType.WITH, advance=False) 6285 and self._next 6286 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6287 ) 6288 6289 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6290 return self.expression( 6291 exp.ColumnConstraint, 6292 this=this, 6293 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6294 ) 6295 6296 return this 6297 6298 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6299 if not self._match(TokenType.CONSTRAINT): 6300 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6301 6302 return self.expression( 6303 exp.Constraint, 6304 this=self._parse_id_var(), 6305 expressions=self._parse_unnamed_constraints(), 6306 ) 6307 6308 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6309 constraints = [] 6310 while True: 6311 constraint = self._parse_unnamed_constraint() or self._parse_function() 6312 if not constraint: 6313 break 6314 constraints.append(constraint) 6315 6316 return constraints 6317 6318 def _parse_unnamed_constraint( 6319 self, constraints: t.Optional[t.Collection[str]] = None 6320 ) -> t.Optional[exp.Expression]: 6321 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6322 constraints or self.CONSTRAINT_PARSERS 6323 ): 6324 return None 6325 6326 constraint = self._prev.text.upper() 6327 if constraint not in self.CONSTRAINT_PARSERS: 6328 self.raise_error(f"No parser found for schema constraint {constraint}.") 6329 6330 return self.CONSTRAINT_PARSERS[constraint](self) 6331 6332 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6333 return self._parse_id_var(any_token=False) 6334 6335 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6336 self._match_texts(("KEY", "INDEX")) 6337 return self.expression( 6338 exp.UniqueColumnConstraint, 6339 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6340 this=self._parse_schema(self._parse_unique_key()), 6341 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6342 on_conflict=self._parse_on_conflict(), 6343 options=self._parse_key_constraint_options(), 6344 ) 6345 6346 def _parse_key_constraint_options(self) -> t.List[str]: 6347 options = [] 6348 while True: 6349 if not self._curr: 6350 break 6351 6352 if self._match(TokenType.ON): 6353 action = None 6354 on = self._advance_any() and self._prev.text 6355 6356 if self._match_text_seq("NO", "ACTION"): 6357 action = "NO ACTION" 6358 elif self._match_text_seq("CASCADE"): 6359 action = "CASCADE" 6360 elif self._match_text_seq("RESTRICT"): 6361 action = "RESTRICT" 6362 elif self._match_pair(TokenType.SET, TokenType.NULL): 6363 action = "SET NULL" 6364 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6365 action = "SET DEFAULT" 6366 else: 6367 self.raise_error("Invalid key constraint") 6368 6369 options.append(f"ON {on} {action}") 6370 else: 6371 var = self._parse_var_from_options( 6372 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6373 ) 6374 if not var: 6375 break 6376 options.append(var.name) 6377 6378 return options 6379 6380 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6381 if match and not self._match(TokenType.REFERENCES): 6382 return None 6383 6384 expressions = None 6385 this = self._parse_table(schema=True) 6386 options = self._parse_key_constraint_options() 6387 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6388 6389 def _parse_foreign_key(self) -> exp.ForeignKey: 6390 expressions = ( 6391 self._parse_wrapped_id_vars() 6392 if not self._match(TokenType.REFERENCES, advance=False) 6393 else None 6394 ) 6395 reference = self._parse_references() 6396 on_options = {} 6397 6398 while self._match(TokenType.ON): 6399 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6400 self.raise_error("Expected DELETE or UPDATE") 6401 6402 kind = self._prev.text.lower() 6403 6404 if self._match_text_seq("NO", "ACTION"): 6405 action = "NO ACTION" 6406 elif self._match(TokenType.SET): 6407 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6408 action = "SET " + self._prev.text.upper() 6409 else: 6410 self._advance() 6411 action = self._prev.text.upper() 6412 6413 on_options[kind] = action 6414 6415 return self.expression( 6416 exp.ForeignKey, 6417 expressions=expressions, 6418 reference=reference, 6419 options=self._parse_key_constraint_options(), 6420 **on_options, # type: ignore 6421 ) 6422 6423 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6424 return self._parse_ordered() or self._parse_field() 6425 6426 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6427 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6428 self._retreat(self._index - 1) 6429 return None 6430 6431 id_vars = self._parse_wrapped_id_vars() 6432 return self.expression( 6433 exp.PeriodForSystemTimeConstraint, 6434 this=seq_get(id_vars, 0), 6435 expression=seq_get(id_vars, 1), 6436 ) 6437 6438 def _parse_primary_key( 6439 self, wrapped_optional: bool = False, in_props: bool = False 6440 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6441 desc = ( 6442 self._match_set((TokenType.ASC, TokenType.DESC)) 6443 and self._prev.token_type == TokenType.DESC 6444 ) 6445 6446 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6447 return self.expression( 6448 exp.PrimaryKeyColumnConstraint, 6449 desc=desc, 6450 options=self._parse_key_constraint_options(), 6451 ) 6452 6453 expressions = self._parse_wrapped_csv( 6454 self._parse_primary_key_part, optional=wrapped_optional 6455 ) 6456 6457 return self.expression( 6458 exp.PrimaryKey, 6459 expressions=expressions, 6460 include=self._parse_index_params(), 6461 options=self._parse_key_constraint_options(), 6462 ) 6463 6464 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6465 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6466 6467 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6468 """ 6469 Parses a datetime column in ODBC format. We parse the column into the corresponding 6470 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6471 same as we did for `DATE('yyyy-mm-dd')`. 6472 6473 Reference: 6474 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6475 """ 6476 self._match(TokenType.VAR) 6477 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6478 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6479 if not self._match(TokenType.R_BRACE): 6480 self.raise_error("Expected }") 6481 return expression 6482 6483 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6484 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6485 return this 6486 6487 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6488 map_token = seq_get(self._tokens, self._index - 2) 6489 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6490 else: 6491 parse_map = False 6492 6493 bracket_kind = self._prev.token_type 6494 if ( 6495 bracket_kind == TokenType.L_BRACE 6496 and self._curr 6497 and self._curr.token_type == TokenType.VAR 6498 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6499 ): 6500 return self._parse_odbc_datetime_literal() 6501 6502 expressions = self._parse_csv( 6503 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6504 ) 6505 6506 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6507 self.raise_error("Expected ]") 6508 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6509 self.raise_error("Expected }") 6510 6511 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6512 if bracket_kind == TokenType.L_BRACE: 6513 this = self.expression( 6514 exp.Struct, 6515 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6516 ) 6517 elif not this: 6518 this = build_array_constructor( 6519 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6520 ) 6521 else: 6522 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6523 if constructor_type: 6524 return build_array_constructor( 6525 constructor_type, 6526 args=expressions, 6527 bracket_kind=bracket_kind, 6528 dialect=self.dialect, 6529 ) 6530 6531 expressions = apply_index_offset( 6532 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6533 ) 6534 this = self.expression( 6535 exp.Bracket, 6536 this=this, 6537 expressions=expressions, 6538 comments=this.pop_comments(), 6539 ) 6540 6541 self._add_comments(this) 6542 return self._parse_bracket(this) 6543 6544 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6545 if self._match(TokenType.COLON): 6546 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6547 return this 6548 6549 def _parse_case(self) -> t.Optional[exp.Expression]: 6550 if self._match(TokenType.DOT, advance=False): 6551 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6552 self._retreat(self._index - 1) 6553 return None 6554 6555 ifs = [] 6556 default = None 6557 6558 comments = self._prev_comments 6559 expression = self._parse_assignment() 6560 6561 while self._match(TokenType.WHEN): 6562 this = self._parse_assignment() 6563 self._match(TokenType.THEN) 6564 then = self._parse_assignment() 6565 ifs.append(self.expression(exp.If, this=this, true=then)) 6566 6567 if self._match(TokenType.ELSE): 6568 default = self._parse_assignment() 6569 6570 if not self._match(TokenType.END): 6571 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6572 default = exp.column("interval") 6573 else: 6574 self.raise_error("Expected END after CASE", self._prev) 6575 6576 return self.expression( 6577 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6578 ) 6579 6580 def _parse_if(self) -> t.Optional[exp.Expression]: 6581 if self._match(TokenType.L_PAREN): 6582 args = self._parse_csv( 6583 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6584 ) 6585 this = self.validate_expression(exp.If.from_arg_list(args), args) 6586 self._match_r_paren() 6587 else: 6588 index = self._index - 1 6589 6590 if self.NO_PAREN_IF_COMMANDS and index == 0: 6591 return self._parse_as_command(self._prev) 6592 6593 condition = self._parse_assignment() 6594 6595 if not condition: 6596 self._retreat(index) 6597 return None 6598 6599 self._match(TokenType.THEN) 6600 true = self._parse_assignment() 6601 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6602 self._match(TokenType.END) 6603 this = self.expression(exp.If, this=condition, true=true, false=false) 6604 6605 return this 6606 6607 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6608 if not self._match_text_seq("VALUE", "FOR"): 6609 self._retreat(self._index - 1) 6610 return None 6611 6612 return self.expression( 6613 exp.NextValueFor, 6614 this=self._parse_column(), 6615 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6616 ) 6617 6618 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6619 this = self._parse_function() or self._parse_var_or_string(upper=True) 6620 6621 if self._match(TokenType.FROM): 6622 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6623 6624 if not self._match(TokenType.COMMA): 6625 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6626 6627 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6628 6629 def _parse_gap_fill(self) -> exp.GapFill: 6630 self._match(TokenType.TABLE) 6631 this = self._parse_table() 6632 6633 self._match(TokenType.COMMA) 6634 args = [this, *self._parse_csv(self._parse_lambda)] 6635 6636 gap_fill = exp.GapFill.from_arg_list(args) 6637 return self.validate_expression(gap_fill, args) 6638 6639 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6640 this = self._parse_assignment() 6641 6642 if not self._match(TokenType.ALIAS): 6643 if self._match(TokenType.COMMA): 6644 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6645 6646 self.raise_error("Expected AS after CAST") 6647 6648 fmt = None 6649 to = self._parse_types() 6650 6651 default = self._match(TokenType.DEFAULT) 6652 if default: 6653 default = self._parse_bitwise() 6654 self._match_text_seq("ON", "CONVERSION", "ERROR") 6655 6656 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6657 fmt_string = self._parse_string() 6658 fmt = self._parse_at_time_zone(fmt_string) 6659 6660 if not to: 6661 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6662 if to.this in exp.DataType.TEMPORAL_TYPES: 6663 this = self.expression( 6664 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6665 this=this, 6666 format=exp.Literal.string( 6667 format_time( 6668 fmt_string.this if fmt_string else "", 6669 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6670 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6671 ) 6672 ), 6673 safe=safe, 6674 ) 6675 6676 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6677 this.set("zone", fmt.args["zone"]) 6678 return this 6679 elif not to: 6680 self.raise_error("Expected TYPE after CAST") 6681 elif isinstance(to, exp.Identifier): 6682 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6683 elif to.this == exp.DataType.Type.CHAR: 6684 if self._match(TokenType.CHARACTER_SET): 6685 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6686 6687 return self.build_cast( 6688 strict=strict, 6689 this=this, 6690 to=to, 6691 format=fmt, 6692 safe=safe, 6693 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6694 default=default, 6695 ) 6696 6697 def _parse_string_agg(self) -> exp.GroupConcat: 6698 if self._match(TokenType.DISTINCT): 6699 args: t.List[t.Optional[exp.Expression]] = [ 6700 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6701 ] 6702 if self._match(TokenType.COMMA): 6703 args.extend(self._parse_csv(self._parse_assignment)) 6704 else: 6705 args = self._parse_csv(self._parse_assignment) # type: ignore 6706 6707 if self._match_text_seq("ON", "OVERFLOW"): 6708 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6709 if self._match_text_seq("ERROR"): 6710 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6711 else: 6712 self._match_text_seq("TRUNCATE") 6713 on_overflow = self.expression( 6714 exp.OverflowTruncateBehavior, 6715 this=self._parse_string(), 6716 with_count=( 6717 self._match_text_seq("WITH", "COUNT") 6718 or not self._match_text_seq("WITHOUT", "COUNT") 6719 ), 6720 ) 6721 else: 6722 on_overflow = None 6723 6724 index = self._index 6725 if not self._match(TokenType.R_PAREN) and args: 6726 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6727 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6728 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6729 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6730 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6731 6732 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6733 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6734 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6735 if not self._match_text_seq("WITHIN", "GROUP"): 6736 self._retreat(index) 6737 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6738 6739 # The corresponding match_r_paren will be called in parse_function (caller) 6740 self._match_l_paren() 6741 6742 return self.expression( 6743 exp.GroupConcat, 6744 this=self._parse_order(this=seq_get(args, 0)), 6745 separator=seq_get(args, 1), 6746 on_overflow=on_overflow, 6747 ) 6748 6749 def _parse_convert( 6750 self, strict: bool, safe: t.Optional[bool] = None 6751 ) -> t.Optional[exp.Expression]: 6752 this = self._parse_bitwise() 6753 6754 if self._match(TokenType.USING): 6755 to: t.Optional[exp.Expression] = self.expression( 6756 exp.CharacterSet, this=self._parse_var() 6757 ) 6758 elif self._match(TokenType.COMMA): 6759 to = self._parse_types() 6760 else: 6761 to = None 6762 6763 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6764 6765 def _parse_xml_table(self) -> exp.XMLTable: 6766 namespaces = None 6767 passing = None 6768 columns = None 6769 6770 if self._match_text_seq("XMLNAMESPACES", "("): 6771 namespaces = self._parse_xml_namespace() 6772 self._match_text_seq(")", ",") 6773 6774 this = self._parse_string() 6775 6776 if self._match_text_seq("PASSING"): 6777 # The BY VALUE keywords are optional and are provided for semantic clarity 6778 self._match_text_seq("BY", "VALUE") 6779 passing = self._parse_csv(self._parse_column) 6780 6781 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6782 6783 if self._match_text_seq("COLUMNS"): 6784 columns = self._parse_csv(self._parse_field_def) 6785 6786 return self.expression( 6787 exp.XMLTable, 6788 this=this, 6789 namespaces=namespaces, 6790 passing=passing, 6791 columns=columns, 6792 by_ref=by_ref, 6793 ) 6794 6795 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6796 namespaces = [] 6797 6798 while True: 6799 if self._match(TokenType.DEFAULT): 6800 uri = self._parse_string() 6801 else: 6802 uri = self._parse_alias(self._parse_string()) 6803 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6804 if not self._match(TokenType.COMMA): 6805 break 6806 6807 return namespaces 6808 6809 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6810 args = self._parse_csv(self._parse_assignment) 6811 6812 if len(args) < 3: 6813 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6814 6815 return self.expression(exp.DecodeCase, expressions=args) 6816 6817 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6818 self._match_text_seq("KEY") 6819 key = self._parse_column() 6820 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6821 self._match_text_seq("VALUE") 6822 value = self._parse_bitwise() 6823 6824 if not key and not value: 6825 return None 6826 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6827 6828 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6829 if not this or not self._match_text_seq("FORMAT", "JSON"): 6830 return this 6831 6832 return self.expression(exp.FormatJson, this=this) 6833 6834 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6835 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6836 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6837 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6838 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6839 else: 6840 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6841 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6842 6843 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6844 6845 if not empty and not error and not null: 6846 return None 6847 6848 return self.expression( 6849 exp.OnCondition, 6850 empty=empty, 6851 error=error, 6852 null=null, 6853 ) 6854 6855 def _parse_on_handling( 6856 self, on: str, *values: str 6857 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6858 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6859 for value in values: 6860 if self._match_text_seq(value, "ON", on): 6861 return f"{value} ON {on}" 6862 6863 index = self._index 6864 if self._match(TokenType.DEFAULT): 6865 default_value = self._parse_bitwise() 6866 if self._match_text_seq("ON", on): 6867 return default_value 6868 6869 self._retreat(index) 6870 6871 return None 6872 6873 @t.overload 6874 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6875 6876 @t.overload 6877 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6878 6879 def _parse_json_object(self, agg=False): 6880 star = self._parse_star() 6881 expressions = ( 6882 [star] 6883 if star 6884 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6885 ) 6886 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6887 6888 unique_keys = None 6889 if self._match_text_seq("WITH", "UNIQUE"): 6890 unique_keys = True 6891 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6892 unique_keys = False 6893 6894 self._match_text_seq("KEYS") 6895 6896 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6897 self._parse_type() 6898 ) 6899 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6900 6901 return self.expression( 6902 exp.JSONObjectAgg if agg else exp.JSONObject, 6903 expressions=expressions, 6904 null_handling=null_handling, 6905 unique_keys=unique_keys, 6906 return_type=return_type, 6907 encoding=encoding, 6908 ) 6909 6910 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6911 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6912 if not self._match_text_seq("NESTED"): 6913 this = self._parse_id_var() 6914 kind = self._parse_types(allow_identifiers=False) 6915 nested = None 6916 else: 6917 this = None 6918 kind = None 6919 nested = True 6920 6921 path = self._match_text_seq("PATH") and self._parse_string() 6922 nested_schema = nested and self._parse_json_schema() 6923 6924 return self.expression( 6925 exp.JSONColumnDef, 6926 this=this, 6927 kind=kind, 6928 path=path, 6929 nested_schema=nested_schema, 6930 ) 6931 6932 def _parse_json_schema(self) -> exp.JSONSchema: 6933 self._match_text_seq("COLUMNS") 6934 return self.expression( 6935 exp.JSONSchema, 6936 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6937 ) 6938 6939 def _parse_json_table(self) -> exp.JSONTable: 6940 this = self._parse_format_json(self._parse_bitwise()) 6941 path = self._match(TokenType.COMMA) and self._parse_string() 6942 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6943 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6944 schema = self._parse_json_schema() 6945 6946 return exp.JSONTable( 6947 this=this, 6948 schema=schema, 6949 path=path, 6950 error_handling=error_handling, 6951 empty_handling=empty_handling, 6952 ) 6953 6954 def _parse_match_against(self) -> exp.MatchAgainst: 6955 if self._match_text_seq("TABLE"): 6956 # parse SingleStore MATCH(TABLE ...) syntax 6957 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6958 expressions = [] 6959 table = self._parse_table() 6960 if table: 6961 expressions = [table] 6962 else: 6963 expressions = self._parse_csv(self._parse_column) 6964 6965 self._match_text_seq(")", "AGAINST", "(") 6966 6967 this = self._parse_string() 6968 6969 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6970 modifier = "IN NATURAL LANGUAGE MODE" 6971 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6972 modifier = f"{modifier} WITH QUERY EXPANSION" 6973 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6974 modifier = "IN BOOLEAN MODE" 6975 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6976 modifier = "WITH QUERY EXPANSION" 6977 else: 6978 modifier = None 6979 6980 return self.expression( 6981 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6982 ) 6983 6984 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6985 def _parse_open_json(self) -> exp.OpenJSON: 6986 this = self._parse_bitwise() 6987 path = self._match(TokenType.COMMA) and self._parse_string() 6988 6989 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6990 this = self._parse_field(any_token=True) 6991 kind = self._parse_types() 6992 path = self._parse_string() 6993 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6994 6995 return self.expression( 6996 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6997 ) 6998 6999 expressions = None 7000 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7001 self._match_l_paren() 7002 expressions = self._parse_csv(_parse_open_json_column_def) 7003 7004 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7005 7006 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7007 args = self._parse_csv(self._parse_bitwise) 7008 7009 if self._match(TokenType.IN): 7010 return self.expression( 7011 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7012 ) 7013 7014 if haystack_first: 7015 haystack = seq_get(args, 0) 7016 needle = seq_get(args, 1) 7017 else: 7018 haystack = seq_get(args, 1) 7019 needle = seq_get(args, 0) 7020 7021 return self.expression( 7022 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7023 ) 7024 7025 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7026 args = self._parse_csv(self._parse_table) 7027 return exp.JoinHint(this=func_name.upper(), expressions=args) 7028 7029 def _parse_substring(self) -> exp.Substring: 7030 # Postgres supports the form: substring(string [from int] [for int]) 7031 # (despite being undocumented, the reverse order also works) 7032 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7033 7034 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7035 7036 start, length = None, None 7037 7038 while self._curr: 7039 if self._match(TokenType.FROM): 7040 start = self._parse_bitwise() 7041 elif self._match(TokenType.FOR): 7042 if not start: 7043 start = exp.Literal.number(1) 7044 length = self._parse_bitwise() 7045 else: 7046 break 7047 7048 if start: 7049 args.append(start) 7050 if length: 7051 args.append(length) 7052 7053 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7054 7055 def _parse_trim(self) -> exp.Trim: 7056 # https://www.w3resource.com/sql/character-functions/trim.php 7057 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7058 7059 position = None 7060 collation = None 7061 expression = None 7062 7063 if self._match_texts(self.TRIM_TYPES): 7064 position = self._prev.text.upper() 7065 7066 this = self._parse_bitwise() 7067 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7068 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7069 expression = self._parse_bitwise() 7070 7071 if invert_order: 7072 this, expression = expression, this 7073 7074 if self._match(TokenType.COLLATE): 7075 collation = self._parse_bitwise() 7076 7077 return self.expression( 7078 exp.Trim, this=this, position=position, expression=expression, collation=collation 7079 ) 7080 7081 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7082 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7083 7084 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7085 return self._parse_window(self._parse_id_var(), alias=True) 7086 7087 def _parse_respect_or_ignore_nulls( 7088 self, this: t.Optional[exp.Expression] 7089 ) -> t.Optional[exp.Expression]: 7090 if self._match_text_seq("IGNORE", "NULLS"): 7091 return self.expression(exp.IgnoreNulls, this=this) 7092 if self._match_text_seq("RESPECT", "NULLS"): 7093 return self.expression(exp.RespectNulls, this=this) 7094 return this 7095 7096 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7097 if self._match(TokenType.HAVING): 7098 self._match_texts(("MAX", "MIN")) 7099 max = self._prev.text.upper() != "MIN" 7100 return self.expression( 7101 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7102 ) 7103 7104 return this 7105 7106 def _parse_window( 7107 self, this: t.Optional[exp.Expression], alias: bool = False 7108 ) -> t.Optional[exp.Expression]: 7109 func = this 7110 comments = func.comments if isinstance(func, exp.Expression) else None 7111 7112 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7113 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7114 if self._match_text_seq("WITHIN", "GROUP"): 7115 order = self._parse_wrapped(self._parse_order) 7116 this = self.expression(exp.WithinGroup, this=this, expression=order) 7117 7118 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7119 self._match(TokenType.WHERE) 7120 this = self.expression( 7121 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7122 ) 7123 self._match_r_paren() 7124 7125 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7126 # Some dialects choose to implement and some do not. 7127 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7128 7129 # There is some code above in _parse_lambda that handles 7130 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7131 7132 # The below changes handle 7133 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7134 7135 # Oracle allows both formats 7136 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7137 # and Snowflake chose to do the same for familiarity 7138 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7139 if isinstance(this, exp.AggFunc): 7140 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7141 7142 if ignore_respect and ignore_respect is not this: 7143 ignore_respect.replace(ignore_respect.this) 7144 this = self.expression(ignore_respect.__class__, this=this) 7145 7146 this = self._parse_respect_or_ignore_nulls(this) 7147 7148 # bigquery select from window x AS (partition by ...) 7149 if alias: 7150 over = None 7151 self._match(TokenType.ALIAS) 7152 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7153 return this 7154 else: 7155 over = self._prev.text.upper() 7156 7157 if comments and isinstance(func, exp.Expression): 7158 func.pop_comments() 7159 7160 if not self._match(TokenType.L_PAREN): 7161 return self.expression( 7162 exp.Window, 7163 comments=comments, 7164 this=this, 7165 alias=self._parse_id_var(False), 7166 over=over, 7167 ) 7168 7169 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7170 7171 first = self._match(TokenType.FIRST) 7172 if self._match_text_seq("LAST"): 7173 first = False 7174 7175 partition, order = self._parse_partition_and_order() 7176 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7177 7178 if kind: 7179 self._match(TokenType.BETWEEN) 7180 start = self._parse_window_spec() 7181 7182 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7183 exclude = ( 7184 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7185 if self._match_text_seq("EXCLUDE") 7186 else None 7187 ) 7188 7189 spec = self.expression( 7190 exp.WindowSpec, 7191 kind=kind, 7192 start=start["value"], 7193 start_side=start["side"], 7194 end=end.get("value"), 7195 end_side=end.get("side"), 7196 exclude=exclude, 7197 ) 7198 else: 7199 spec = None 7200 7201 self._match_r_paren() 7202 7203 window = self.expression( 7204 exp.Window, 7205 comments=comments, 7206 this=this, 7207 partition_by=partition, 7208 order=order, 7209 spec=spec, 7210 alias=window_alias, 7211 over=over, 7212 first=first, 7213 ) 7214 7215 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7216 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7217 return self._parse_window(window, alias=alias) 7218 7219 return window 7220 7221 def _parse_partition_and_order( 7222 self, 7223 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7224 return self._parse_partition_by(), self._parse_order() 7225 7226 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7227 self._match(TokenType.BETWEEN) 7228 7229 return { 7230 "value": ( 7231 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7232 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7233 or self._parse_type() 7234 ), 7235 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7236 } 7237 7238 def _parse_alias( 7239 self, this: t.Optional[exp.Expression], explicit: bool = False 7240 ) -> t.Optional[exp.Expression]: 7241 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7242 # so this section tries to parse the clause version and if it fails, it treats the token 7243 # as an identifier (alias) 7244 if self._can_parse_limit_or_offset(): 7245 return this 7246 7247 any_token = self._match(TokenType.ALIAS) 7248 comments = self._prev_comments or [] 7249 7250 if explicit and not any_token: 7251 return this 7252 7253 if self._match(TokenType.L_PAREN): 7254 aliases = self.expression( 7255 exp.Aliases, 7256 comments=comments, 7257 this=this, 7258 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7259 ) 7260 self._match_r_paren(aliases) 7261 return aliases 7262 7263 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7264 self.STRING_ALIASES and self._parse_string_as_identifier() 7265 ) 7266 7267 if alias: 7268 comments.extend(alias.pop_comments()) 7269 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7270 column = this.this 7271 7272 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7273 if not this.comments and column and column.comments: 7274 this.comments = column.pop_comments() 7275 7276 return this 7277 7278 def _parse_id_var( 7279 self, 7280 any_token: bool = True, 7281 tokens: t.Optional[t.Collection[TokenType]] = None, 7282 ) -> t.Optional[exp.Expression]: 7283 expression = self._parse_identifier() 7284 if not expression and ( 7285 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7286 ): 7287 quoted = self._prev.token_type == TokenType.STRING 7288 expression = self._identifier_expression(quoted=quoted) 7289 7290 return expression 7291 7292 def _parse_string(self) -> t.Optional[exp.Expression]: 7293 if self._match_set(self.STRING_PARSERS): 7294 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7295 return self._parse_placeholder() 7296 7297 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7298 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7299 if output: 7300 output.update_positions(self._prev) 7301 return output 7302 7303 def _parse_number(self) -> t.Optional[exp.Expression]: 7304 if self._match_set(self.NUMERIC_PARSERS): 7305 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7306 return self._parse_placeholder() 7307 7308 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7309 if self._match(TokenType.IDENTIFIER): 7310 return self._identifier_expression(quoted=True) 7311 return self._parse_placeholder() 7312 7313 def _parse_var( 7314 self, 7315 any_token: bool = False, 7316 tokens: t.Optional[t.Collection[TokenType]] = None, 7317 upper: bool = False, 7318 ) -> t.Optional[exp.Expression]: 7319 if ( 7320 (any_token and self._advance_any()) 7321 or self._match(TokenType.VAR) 7322 or (self._match_set(tokens) if tokens else False) 7323 ): 7324 return self.expression( 7325 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7326 ) 7327 return self._parse_placeholder() 7328 7329 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7330 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7331 self._advance() 7332 return self._prev 7333 return None 7334 7335 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7336 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7337 7338 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7339 return self._parse_primary() or self._parse_var(any_token=True) 7340 7341 def _parse_null(self) -> t.Optional[exp.Expression]: 7342 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7343 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7344 return self._parse_placeholder() 7345 7346 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7347 if self._match(TokenType.TRUE): 7348 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7349 if self._match(TokenType.FALSE): 7350 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7351 return self._parse_placeholder() 7352 7353 def _parse_star(self) -> t.Optional[exp.Expression]: 7354 if self._match(TokenType.STAR): 7355 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7356 return self._parse_placeholder() 7357 7358 def _parse_parameter(self) -> exp.Parameter: 7359 this = self._parse_identifier() or self._parse_primary_or_var() 7360 return self.expression(exp.Parameter, this=this) 7361 7362 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7363 if self._match_set(self.PLACEHOLDER_PARSERS): 7364 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7365 if placeholder: 7366 return placeholder 7367 self._advance(-1) 7368 return None 7369 7370 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7371 if not self._match_texts(keywords): 7372 return None 7373 if self._match(TokenType.L_PAREN, advance=False): 7374 return self._parse_wrapped_csv(self._parse_expression) 7375 7376 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7377 return [expression] if expression else None 7378 7379 def _parse_csv( 7380 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7381 ) -> t.List[exp.Expression]: 7382 parse_result = parse_method() 7383 items = [parse_result] if parse_result is not None else [] 7384 7385 while self._match(sep): 7386 self._add_comments(parse_result) 7387 parse_result = parse_method() 7388 if parse_result is not None: 7389 items.append(parse_result) 7390 7391 return items 7392 7393 def _parse_tokens( 7394 self, parse_method: t.Callable, expressions: t.Dict 7395 ) -> t.Optional[exp.Expression]: 7396 this = parse_method() 7397 7398 while self._match_set(expressions): 7399 this = self.expression( 7400 expressions[self._prev.token_type], 7401 this=this, 7402 comments=self._prev_comments, 7403 expression=parse_method(), 7404 ) 7405 7406 return this 7407 7408 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7409 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7410 7411 def _parse_wrapped_csv( 7412 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7413 ) -> t.List[exp.Expression]: 7414 return self._parse_wrapped( 7415 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7416 ) 7417 7418 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7419 wrapped = self._match(TokenType.L_PAREN) 7420 if not wrapped and not optional: 7421 self.raise_error("Expecting (") 7422 parse_result = parse_method() 7423 if wrapped: 7424 self._match_r_paren() 7425 return parse_result 7426 7427 def _parse_expressions(self) -> t.List[exp.Expression]: 7428 return self._parse_csv(self._parse_expression) 7429 7430 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7431 return ( 7432 self._parse_set_operations( 7433 self._parse_alias(self._parse_assignment(), explicit=True) 7434 if alias 7435 else self._parse_assignment() 7436 ) 7437 or self._parse_select() 7438 ) 7439 7440 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7441 return self._parse_query_modifiers( 7442 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7443 ) 7444 7445 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7446 this = None 7447 if self._match_texts(self.TRANSACTION_KIND): 7448 this = self._prev.text 7449 7450 self._match_texts(("TRANSACTION", "WORK")) 7451 7452 modes = [] 7453 while True: 7454 mode = [] 7455 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7456 mode.append(self._prev.text) 7457 7458 if mode: 7459 modes.append(" ".join(mode)) 7460 if not self._match(TokenType.COMMA): 7461 break 7462 7463 return self.expression(exp.Transaction, this=this, modes=modes) 7464 7465 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7466 chain = None 7467 savepoint = None 7468 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7469 7470 self._match_texts(("TRANSACTION", "WORK")) 7471 7472 if self._match_text_seq("TO"): 7473 self._match_text_seq("SAVEPOINT") 7474 savepoint = self._parse_id_var() 7475 7476 if self._match(TokenType.AND): 7477 chain = not self._match_text_seq("NO") 7478 self._match_text_seq("CHAIN") 7479 7480 if is_rollback: 7481 return self.expression(exp.Rollback, savepoint=savepoint) 7482 7483 return self.expression(exp.Commit, chain=chain) 7484 7485 def _parse_refresh(self) -> exp.Refresh: 7486 self._match(TokenType.TABLE) 7487 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7488 7489 def _parse_column_def_with_exists(self): 7490 start = self._index 7491 self._match(TokenType.COLUMN) 7492 7493 exists_column = self._parse_exists(not_=True) 7494 expression = self._parse_field_def() 7495 7496 if not isinstance(expression, exp.ColumnDef): 7497 self._retreat(start) 7498 return None 7499 7500 expression.set("exists", exists_column) 7501 7502 return expression 7503 7504 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7505 if not self._prev.text.upper() == "ADD": 7506 return None 7507 7508 expression = self._parse_column_def_with_exists() 7509 if not expression: 7510 return None 7511 7512 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7513 if self._match_texts(("FIRST", "AFTER")): 7514 position = self._prev.text 7515 column_position = self.expression( 7516 exp.ColumnPosition, this=self._parse_column(), position=position 7517 ) 7518 expression.set("position", column_position) 7519 7520 return expression 7521 7522 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7523 drop = self._match(TokenType.DROP) and self._parse_drop() 7524 if drop and not isinstance(drop, exp.Command): 7525 drop.set("kind", drop.args.get("kind", "COLUMN")) 7526 return drop 7527 7528 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7529 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7530 return self.expression( 7531 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7532 ) 7533 7534 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7535 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7536 self._match_text_seq("ADD") 7537 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7538 return self.expression( 7539 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7540 ) 7541 7542 column_def = self._parse_add_column() 7543 if isinstance(column_def, exp.ColumnDef): 7544 return column_def 7545 7546 exists = self._parse_exists(not_=True) 7547 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7548 return self.expression( 7549 exp.AddPartition, 7550 exists=exists, 7551 this=self._parse_field(any_token=True), 7552 location=self._match_text_seq("LOCATION", advance=False) 7553 and self._parse_property(), 7554 ) 7555 7556 return None 7557 7558 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7559 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7560 or self._match_text_seq("COLUMNS") 7561 ): 7562 schema = self._parse_schema() 7563 7564 return ( 7565 ensure_list(schema) 7566 if schema 7567 else self._parse_csv(self._parse_column_def_with_exists) 7568 ) 7569 7570 return self._parse_csv(_parse_add_alteration) 7571 7572 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7573 if self._match_texts(self.ALTER_ALTER_PARSERS): 7574 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7575 7576 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7577 # keyword after ALTER we default to parsing this statement 7578 self._match(TokenType.COLUMN) 7579 column = self._parse_field(any_token=True) 7580 7581 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7582 return self.expression(exp.AlterColumn, this=column, drop=True) 7583 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7584 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7585 if self._match(TokenType.COMMENT): 7586 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7587 if self._match_text_seq("DROP", "NOT", "NULL"): 7588 return self.expression( 7589 exp.AlterColumn, 7590 this=column, 7591 drop=True, 7592 allow_null=True, 7593 ) 7594 if self._match_text_seq("SET", "NOT", "NULL"): 7595 return self.expression( 7596 exp.AlterColumn, 7597 this=column, 7598 allow_null=False, 7599 ) 7600 7601 if self._match_text_seq("SET", "VISIBLE"): 7602 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7603 if self._match_text_seq("SET", "INVISIBLE"): 7604 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7605 7606 self._match_text_seq("SET", "DATA") 7607 self._match_text_seq("TYPE") 7608 return self.expression( 7609 exp.AlterColumn, 7610 this=column, 7611 dtype=self._parse_types(), 7612 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7613 using=self._match(TokenType.USING) and self._parse_assignment(), 7614 ) 7615 7616 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7617 if self._match_texts(("ALL", "EVEN", "AUTO")): 7618 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7619 7620 self._match_text_seq("KEY", "DISTKEY") 7621 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7622 7623 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7624 if compound: 7625 self._match_text_seq("SORTKEY") 7626 7627 if self._match(TokenType.L_PAREN, advance=False): 7628 return self.expression( 7629 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7630 ) 7631 7632 self._match_texts(("AUTO", "NONE")) 7633 return self.expression( 7634 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7635 ) 7636 7637 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7638 index = self._index - 1 7639 7640 partition_exists = self._parse_exists() 7641 if self._match(TokenType.PARTITION, advance=False): 7642 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7643 7644 self._retreat(index) 7645 return self._parse_csv(self._parse_drop_column) 7646 7647 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7648 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7649 exists = self._parse_exists() 7650 old_column = self._parse_column() 7651 to = self._match_text_seq("TO") 7652 new_column = self._parse_column() 7653 7654 if old_column is None or to is None or new_column is None: 7655 return None 7656 7657 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7658 7659 self._match_text_seq("TO") 7660 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7661 7662 def _parse_alter_table_set(self) -> exp.AlterSet: 7663 alter_set = self.expression(exp.AlterSet) 7664 7665 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7666 "TABLE", "PROPERTIES" 7667 ): 7668 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7669 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7670 alter_set.set("expressions", [self._parse_assignment()]) 7671 elif self._match_texts(("LOGGED", "UNLOGGED")): 7672 alter_set.set("option", exp.var(self._prev.text.upper())) 7673 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7674 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7675 elif self._match_text_seq("LOCATION"): 7676 alter_set.set("location", self._parse_field()) 7677 elif self._match_text_seq("ACCESS", "METHOD"): 7678 alter_set.set("access_method", self._parse_field()) 7679 elif self._match_text_seq("TABLESPACE"): 7680 alter_set.set("tablespace", self._parse_field()) 7681 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7682 alter_set.set("file_format", [self._parse_field()]) 7683 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7684 alter_set.set("file_format", self._parse_wrapped_options()) 7685 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7686 alter_set.set("copy_options", self._parse_wrapped_options()) 7687 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7688 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7689 else: 7690 if self._match_text_seq("SERDE"): 7691 alter_set.set("serde", self._parse_field()) 7692 7693 properties = self._parse_wrapped(self._parse_properties, optional=True) 7694 alter_set.set("expressions", [properties]) 7695 7696 return alter_set 7697 7698 def _parse_alter_session(self) -> exp.AlterSession: 7699 """Parse ALTER SESSION SET/UNSET statements.""" 7700 if self._match(TokenType.SET): 7701 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7702 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7703 7704 self._match_text_seq("UNSET") 7705 expressions = self._parse_csv( 7706 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7707 ) 7708 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7709 7710 def _parse_alter(self) -> exp.Alter | exp.Command: 7711 start = self._prev 7712 7713 alter_token = self._match_set(self.ALTERABLES) and self._prev 7714 if not alter_token: 7715 return self._parse_as_command(start) 7716 7717 exists = self._parse_exists() 7718 only = self._match_text_seq("ONLY") 7719 7720 if alter_token.token_type == TokenType.SESSION: 7721 this = None 7722 check = None 7723 cluster = None 7724 else: 7725 this = self._parse_table(schema=True) 7726 check = self._match_text_seq("WITH", "CHECK") 7727 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7728 7729 if self._next: 7730 self._advance() 7731 7732 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7733 if parser: 7734 actions = ensure_list(parser(self)) 7735 not_valid = self._match_text_seq("NOT", "VALID") 7736 options = self._parse_csv(self._parse_property) 7737 7738 if not self._curr and actions: 7739 return self.expression( 7740 exp.Alter, 7741 this=this, 7742 kind=alter_token.text.upper(), 7743 exists=exists, 7744 actions=actions, 7745 only=only, 7746 options=options, 7747 cluster=cluster, 7748 not_valid=not_valid, 7749 check=check, 7750 ) 7751 7752 return self._parse_as_command(start) 7753 7754 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7755 start = self._prev 7756 # https://duckdb.org/docs/sql/statements/analyze 7757 if not self._curr: 7758 return self.expression(exp.Analyze) 7759 7760 options = [] 7761 while self._match_texts(self.ANALYZE_STYLES): 7762 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7763 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7764 else: 7765 options.append(self._prev.text.upper()) 7766 7767 this: t.Optional[exp.Expression] = None 7768 inner_expression: t.Optional[exp.Expression] = None 7769 7770 kind = self._curr and self._curr.text.upper() 7771 7772 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7773 this = self._parse_table_parts() 7774 elif self._match_text_seq("TABLES"): 7775 if self._match_set((TokenType.FROM, TokenType.IN)): 7776 kind = f"{kind} {self._prev.text.upper()}" 7777 this = self._parse_table(schema=True, is_db_reference=True) 7778 elif self._match_text_seq("DATABASE"): 7779 this = self._parse_table(schema=True, is_db_reference=True) 7780 elif self._match_text_seq("CLUSTER"): 7781 this = self._parse_table() 7782 # Try matching inner expr keywords before fallback to parse table. 7783 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7784 kind = None 7785 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7786 else: 7787 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7788 kind = None 7789 this = self._parse_table_parts() 7790 7791 partition = self._try_parse(self._parse_partition) 7792 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7793 return self._parse_as_command(start) 7794 7795 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7796 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7797 "WITH", "ASYNC", "MODE" 7798 ): 7799 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7800 else: 7801 mode = None 7802 7803 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7804 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7805 7806 properties = self._parse_properties() 7807 return self.expression( 7808 exp.Analyze, 7809 kind=kind, 7810 this=this, 7811 mode=mode, 7812 partition=partition, 7813 properties=properties, 7814 expression=inner_expression, 7815 options=options, 7816 ) 7817 7818 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7819 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7820 this = None 7821 kind = self._prev.text.upper() 7822 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7823 expressions = [] 7824 7825 if not self._match_text_seq("STATISTICS"): 7826 self.raise_error("Expecting token STATISTICS") 7827 7828 if self._match_text_seq("NOSCAN"): 7829 this = "NOSCAN" 7830 elif self._match(TokenType.FOR): 7831 if self._match_text_seq("ALL", "COLUMNS"): 7832 this = "FOR ALL COLUMNS" 7833 if self._match_texts("COLUMNS"): 7834 this = "FOR COLUMNS" 7835 expressions = self._parse_csv(self._parse_column_reference) 7836 elif self._match_text_seq("SAMPLE"): 7837 sample = self._parse_number() 7838 expressions = [ 7839 self.expression( 7840 exp.AnalyzeSample, 7841 sample=sample, 7842 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7843 ) 7844 ] 7845 7846 return self.expression( 7847 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7848 ) 7849 7850 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7851 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7852 kind = None 7853 this = None 7854 expression: t.Optional[exp.Expression] = None 7855 if self._match_text_seq("REF", "UPDATE"): 7856 kind = "REF" 7857 this = "UPDATE" 7858 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7859 this = "UPDATE SET DANGLING TO NULL" 7860 elif self._match_text_seq("STRUCTURE"): 7861 kind = "STRUCTURE" 7862 if self._match_text_seq("CASCADE", "FAST"): 7863 this = "CASCADE FAST" 7864 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7865 ("ONLINE", "OFFLINE") 7866 ): 7867 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7868 expression = self._parse_into() 7869 7870 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7871 7872 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7873 this = self._prev.text.upper() 7874 if self._match_text_seq("COLUMNS"): 7875 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7876 return None 7877 7878 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7879 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7880 if self._match_text_seq("STATISTICS"): 7881 return self.expression(exp.AnalyzeDelete, kind=kind) 7882 return None 7883 7884 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7885 if self._match_text_seq("CHAINED", "ROWS"): 7886 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7887 return None 7888 7889 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7890 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7891 this = self._prev.text.upper() 7892 expression: t.Optional[exp.Expression] = None 7893 expressions = [] 7894 update_options = None 7895 7896 if self._match_text_seq("HISTOGRAM", "ON"): 7897 expressions = self._parse_csv(self._parse_column_reference) 7898 with_expressions = [] 7899 while self._match(TokenType.WITH): 7900 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7901 if self._match_texts(("SYNC", "ASYNC")): 7902 if self._match_text_seq("MODE", advance=False): 7903 with_expressions.append(f"{self._prev.text.upper()} MODE") 7904 self._advance() 7905 else: 7906 buckets = self._parse_number() 7907 if self._match_text_seq("BUCKETS"): 7908 with_expressions.append(f"{buckets} BUCKETS") 7909 if with_expressions: 7910 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7911 7912 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7913 TokenType.UPDATE, advance=False 7914 ): 7915 update_options = self._prev.text.upper() 7916 self._advance() 7917 elif self._match_text_seq("USING", "DATA"): 7918 expression = self.expression(exp.UsingData, this=self._parse_string()) 7919 7920 return self.expression( 7921 exp.AnalyzeHistogram, 7922 this=this, 7923 expressions=expressions, 7924 expression=expression, 7925 update_options=update_options, 7926 ) 7927 7928 def _parse_merge(self) -> exp.Merge: 7929 self._match(TokenType.INTO) 7930 target = self._parse_table() 7931 7932 if target and self._match(TokenType.ALIAS, advance=False): 7933 target.set("alias", self._parse_table_alias()) 7934 7935 self._match(TokenType.USING) 7936 using = self._parse_table() 7937 7938 self._match(TokenType.ON) 7939 on = self._parse_assignment() 7940 7941 return self.expression( 7942 exp.Merge, 7943 this=target, 7944 using=using, 7945 on=on, 7946 whens=self._parse_when_matched(), 7947 returning=self._parse_returning(), 7948 ) 7949 7950 def _parse_when_matched(self) -> exp.Whens: 7951 whens = [] 7952 7953 while self._match(TokenType.WHEN): 7954 matched = not self._match(TokenType.NOT) 7955 self._match_text_seq("MATCHED") 7956 source = ( 7957 False 7958 if self._match_text_seq("BY", "TARGET") 7959 else self._match_text_seq("BY", "SOURCE") 7960 ) 7961 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7962 7963 self._match(TokenType.THEN) 7964 7965 if self._match(TokenType.INSERT): 7966 this = self._parse_star() 7967 if this: 7968 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7969 else: 7970 then = self.expression( 7971 exp.Insert, 7972 this=exp.var("ROW") 7973 if self._match_text_seq("ROW") 7974 else self._parse_value(values=False), 7975 expression=self._match_text_seq("VALUES") and self._parse_value(), 7976 ) 7977 elif self._match(TokenType.UPDATE): 7978 expressions = self._parse_star() 7979 if expressions: 7980 then = self.expression(exp.Update, expressions=expressions) 7981 else: 7982 then = self.expression( 7983 exp.Update, 7984 expressions=self._match(TokenType.SET) 7985 and self._parse_csv(self._parse_equality), 7986 ) 7987 elif self._match(TokenType.DELETE): 7988 then = self.expression(exp.Var, this=self._prev.text) 7989 else: 7990 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7991 7992 whens.append( 7993 self.expression( 7994 exp.When, 7995 matched=matched, 7996 source=source, 7997 condition=condition, 7998 then=then, 7999 ) 8000 ) 8001 return self.expression(exp.Whens, expressions=whens) 8002 8003 def _parse_show(self) -> t.Optional[exp.Expression]: 8004 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8005 if parser: 8006 return parser(self) 8007 return self._parse_as_command(self._prev) 8008 8009 def _parse_set_item_assignment( 8010 self, kind: t.Optional[str] = None 8011 ) -> t.Optional[exp.Expression]: 8012 index = self._index 8013 8014 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8015 return self._parse_set_transaction(global_=kind == "GLOBAL") 8016 8017 left = self._parse_primary() or self._parse_column() 8018 assignment_delimiter = self._match_texts(("=", "TO")) 8019 8020 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8021 self._retreat(index) 8022 return None 8023 8024 right = self._parse_statement() or self._parse_id_var() 8025 if isinstance(right, (exp.Column, exp.Identifier)): 8026 right = exp.var(right.name) 8027 8028 this = self.expression(exp.EQ, this=left, expression=right) 8029 return self.expression(exp.SetItem, this=this, kind=kind) 8030 8031 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8032 self._match_text_seq("TRANSACTION") 8033 characteristics = self._parse_csv( 8034 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8035 ) 8036 return self.expression( 8037 exp.SetItem, 8038 expressions=characteristics, 8039 kind="TRANSACTION", 8040 **{"global": global_}, # type: ignore 8041 ) 8042 8043 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8044 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8045 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8046 8047 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8048 index = self._index 8049 set_ = self.expression( 8050 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8051 ) 8052 8053 if self._curr: 8054 self._retreat(index) 8055 return self._parse_as_command(self._prev) 8056 8057 return set_ 8058 8059 def _parse_var_from_options( 8060 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8061 ) -> t.Optional[exp.Var]: 8062 start = self._curr 8063 if not start: 8064 return None 8065 8066 option = start.text.upper() 8067 continuations = options.get(option) 8068 8069 index = self._index 8070 self._advance() 8071 for keywords in continuations or []: 8072 if isinstance(keywords, str): 8073 keywords = (keywords,) 8074 8075 if self._match_text_seq(*keywords): 8076 option = f"{option} {' '.join(keywords)}" 8077 break 8078 else: 8079 if continuations or continuations is None: 8080 if raise_unmatched: 8081 self.raise_error(f"Unknown option {option}") 8082 8083 self._retreat(index) 8084 return None 8085 8086 return exp.var(option) 8087 8088 def _parse_as_command(self, start: Token) -> exp.Command: 8089 while self._curr: 8090 self._advance() 8091 text = self._find_sql(start, self._prev) 8092 size = len(start.text) 8093 self._warn_unsupported() 8094 return exp.Command(this=text[:size], expression=text[size:]) 8095 8096 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8097 settings = [] 8098 8099 self._match_l_paren() 8100 kind = self._parse_id_var() 8101 8102 if self._match(TokenType.L_PAREN): 8103 while True: 8104 key = self._parse_id_var() 8105 value = self._parse_primary() 8106 if not key and value is None: 8107 break 8108 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8109 self._match(TokenType.R_PAREN) 8110 8111 self._match_r_paren() 8112 8113 return self.expression( 8114 exp.DictProperty, 8115 this=this, 8116 kind=kind.this if kind else None, 8117 settings=settings, 8118 ) 8119 8120 def _parse_dict_range(self, this: str) -> exp.DictRange: 8121 self._match_l_paren() 8122 has_min = self._match_text_seq("MIN") 8123 if has_min: 8124 min = self._parse_var() or self._parse_primary() 8125 self._match_text_seq("MAX") 8126 max = self._parse_var() or self._parse_primary() 8127 else: 8128 max = self._parse_var() or self._parse_primary() 8129 min = exp.Literal.number(0) 8130 self._match_r_paren() 8131 return self.expression(exp.DictRange, this=this, min=min, max=max) 8132 8133 def _parse_comprehension( 8134 self, this: t.Optional[exp.Expression] 8135 ) -> t.Optional[exp.Comprehension]: 8136 index = self._index 8137 expression = self._parse_column() 8138 if not self._match(TokenType.IN): 8139 self._retreat(index - 1) 8140 return None 8141 iterator = self._parse_column() 8142 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8143 return self.expression( 8144 exp.Comprehension, 8145 this=this, 8146 expression=expression, 8147 iterator=iterator, 8148 condition=condition, 8149 ) 8150 8151 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8152 if self._match(TokenType.HEREDOC_STRING): 8153 return self.expression(exp.Heredoc, this=self._prev.text) 8154 8155 if not self._match_text_seq("$"): 8156 return None 8157 8158 tags = ["$"] 8159 tag_text = None 8160 8161 if self._is_connected(): 8162 self._advance() 8163 tags.append(self._prev.text.upper()) 8164 else: 8165 self.raise_error("No closing $ found") 8166 8167 if tags[-1] != "$": 8168 if self._is_connected() and self._match_text_seq("$"): 8169 tag_text = tags[-1] 8170 tags.append("$") 8171 else: 8172 self.raise_error("No closing $ found") 8173 8174 heredoc_start = self._curr 8175 8176 while self._curr: 8177 if self._match_text_seq(*tags, advance=False): 8178 this = self._find_sql(heredoc_start, self._prev) 8179 self._advance(len(tags)) 8180 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8181 8182 self._advance() 8183 8184 self.raise_error(f"No closing {''.join(tags)} found") 8185 return None 8186 8187 def _find_parser( 8188 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8189 ) -> t.Optional[t.Callable]: 8190 if not self._curr: 8191 return None 8192 8193 index = self._index 8194 this = [] 8195 while True: 8196 # The current token might be multiple words 8197 curr = self._curr.text.upper() 8198 key = curr.split(" ") 8199 this.append(curr) 8200 8201 self._advance() 8202 result, trie = in_trie(trie, key) 8203 if result == TrieResult.FAILED: 8204 break 8205 8206 if result == TrieResult.EXISTS: 8207 subparser = parsers[" ".join(this)] 8208 return subparser 8209 8210 self._retreat(index) 8211 return None 8212 8213 def _match(self, token_type, advance=True, expression=None): 8214 if not self._curr: 8215 return None 8216 8217 if self._curr.token_type == token_type: 8218 if advance: 8219 self._advance() 8220 self._add_comments(expression) 8221 return True 8222 8223 return None 8224 8225 def _match_set(self, types, advance=True): 8226 if not self._curr: 8227 return None 8228 8229 if self._curr.token_type in types: 8230 if advance: 8231 self._advance() 8232 return True 8233 8234 return None 8235 8236 def _match_pair(self, token_type_a, token_type_b, advance=True): 8237 if not self._curr or not self._next: 8238 return None 8239 8240 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8241 if advance: 8242 self._advance(2) 8243 return True 8244 8245 return None 8246 8247 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8248 if not self._match(TokenType.L_PAREN, expression=expression): 8249 self.raise_error("Expecting (") 8250 8251 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8252 if not self._match(TokenType.R_PAREN, expression=expression): 8253 self.raise_error("Expecting )") 8254 8255 def _match_texts(self, texts, advance=True): 8256 if ( 8257 self._curr 8258 and self._curr.token_type != TokenType.STRING 8259 and self._curr.text.upper() in texts 8260 ): 8261 if advance: 8262 self._advance() 8263 return True 8264 return None 8265 8266 def _match_text_seq(self, *texts, advance=True): 8267 index = self._index 8268 for text in texts: 8269 if ( 8270 self._curr 8271 and self._curr.token_type != TokenType.STRING 8272 and self._curr.text.upper() == text 8273 ): 8274 self._advance() 8275 else: 8276 self._retreat(index) 8277 return None 8278 8279 if not advance: 8280 self._retreat(index) 8281 8282 return True 8283 8284 def _replace_lambda( 8285 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8286 ) -> t.Optional[exp.Expression]: 8287 if not node: 8288 return node 8289 8290 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8291 8292 for column in node.find_all(exp.Column): 8293 typ = lambda_types.get(column.parts[0].name) 8294 if typ is not None: 8295 dot_or_id = column.to_dot() if column.table else column.this 8296 8297 if typ: 8298 dot_or_id = self.expression( 8299 exp.Cast, 8300 this=dot_or_id, 8301 to=typ, 8302 ) 8303 8304 parent = column.parent 8305 8306 while isinstance(parent, exp.Dot): 8307 if not isinstance(parent.parent, exp.Dot): 8308 parent.replace(dot_or_id) 8309 break 8310 parent = parent.parent 8311 else: 8312 if column is node: 8313 node = dot_or_id 8314 else: 8315 column.replace(dot_or_id) 8316 return node 8317 8318 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8319 start = self._prev 8320 8321 # Not to be confused with TRUNCATE(number, decimals) function call 8322 if self._match(TokenType.L_PAREN): 8323 self._retreat(self._index - 2) 8324 return self._parse_function() 8325 8326 # Clickhouse supports TRUNCATE DATABASE as well 8327 is_database = self._match(TokenType.DATABASE) 8328 8329 self._match(TokenType.TABLE) 8330 8331 exists = self._parse_exists(not_=False) 8332 8333 expressions = self._parse_csv( 8334 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8335 ) 8336 8337 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8338 8339 if self._match_text_seq("RESTART", "IDENTITY"): 8340 identity = "RESTART" 8341 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8342 identity = "CONTINUE" 8343 else: 8344 identity = None 8345 8346 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8347 option = self._prev.text 8348 else: 8349 option = None 8350 8351 partition = self._parse_partition() 8352 8353 # Fallback case 8354 if self._curr: 8355 return self._parse_as_command(start) 8356 8357 return self.expression( 8358 exp.TruncateTable, 8359 expressions=expressions, 8360 is_database=is_database, 8361 exists=exists, 8362 cluster=cluster, 8363 identity=identity, 8364 option=option, 8365 partition=partition, 8366 ) 8367 8368 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8369 this = self._parse_ordered(self._parse_opclass) 8370 8371 if not self._match(TokenType.WITH): 8372 return this 8373 8374 op = self._parse_var(any_token=True) 8375 8376 return self.expression(exp.WithOperator, this=this, op=op) 8377 8378 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8379 self._match(TokenType.EQ) 8380 self._match(TokenType.L_PAREN) 8381 8382 opts: t.List[t.Optional[exp.Expression]] = [] 8383 option: exp.Expression | None 8384 while self._curr and not self._match(TokenType.R_PAREN): 8385 if self._match_text_seq("FORMAT_NAME", "="): 8386 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8387 option = self._parse_format_name() 8388 else: 8389 option = self._parse_property() 8390 8391 if option is None: 8392 self.raise_error("Unable to parse option") 8393 break 8394 8395 opts.append(option) 8396 8397 return opts 8398 8399 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8400 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8401 8402 options = [] 8403 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8404 option = self._parse_var(any_token=True) 8405 prev = self._prev.text.upper() 8406 8407 # Different dialects might separate options and values by white space, "=" and "AS" 8408 self._match(TokenType.EQ) 8409 self._match(TokenType.ALIAS) 8410 8411 param = self.expression(exp.CopyParameter, this=option) 8412 8413 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8414 TokenType.L_PAREN, advance=False 8415 ): 8416 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8417 param.set("expressions", self._parse_wrapped_options()) 8418 elif prev == "FILE_FORMAT": 8419 # T-SQL's external file format case 8420 param.set("expression", self._parse_field()) 8421 else: 8422 param.set("expression", self._parse_unquoted_field()) 8423 8424 options.append(param) 8425 self._match(sep) 8426 8427 return options 8428 8429 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8430 expr = self.expression(exp.Credentials) 8431 8432 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8433 expr.set("storage", self._parse_field()) 8434 if self._match_text_seq("CREDENTIALS"): 8435 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8436 creds = ( 8437 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8438 ) 8439 expr.set("credentials", creds) 8440 if self._match_text_seq("ENCRYPTION"): 8441 expr.set("encryption", self._parse_wrapped_options()) 8442 if self._match_text_seq("IAM_ROLE"): 8443 expr.set("iam_role", self._parse_field()) 8444 if self._match_text_seq("REGION"): 8445 expr.set("region", self._parse_field()) 8446 8447 return expr 8448 8449 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8450 return self._parse_field() 8451 8452 def _parse_copy(self) -> exp.Copy | exp.Command: 8453 start = self._prev 8454 8455 self._match(TokenType.INTO) 8456 8457 this = ( 8458 self._parse_select(nested=True, parse_subquery_alias=False) 8459 if self._match(TokenType.L_PAREN, advance=False) 8460 else self._parse_table(schema=True) 8461 ) 8462 8463 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8464 8465 files = self._parse_csv(self._parse_file_location) 8466 if self._match(TokenType.EQ, advance=False): 8467 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8468 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8469 # list via `_parse_wrapped(..)` below. 8470 self._advance(-1) 8471 files = [] 8472 8473 credentials = self._parse_credentials() 8474 8475 self._match_text_seq("WITH") 8476 8477 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8478 8479 # Fallback case 8480 if self._curr: 8481 return self._parse_as_command(start) 8482 8483 return self.expression( 8484 exp.Copy, 8485 this=this, 8486 kind=kind, 8487 credentials=credentials, 8488 files=files, 8489 params=params, 8490 ) 8491 8492 def _parse_normalize(self) -> exp.Normalize: 8493 return self.expression( 8494 exp.Normalize, 8495 this=self._parse_bitwise(), 8496 form=self._match(TokenType.COMMA) and self._parse_var(), 8497 ) 8498 8499 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8500 args = self._parse_csv(lambda: self._parse_lambda()) 8501 8502 this = seq_get(args, 0) 8503 decimals = seq_get(args, 1) 8504 8505 return expr_type( 8506 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8507 ) 8508 8509 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8510 star_token = self._prev 8511 8512 if self._match_text_seq("COLUMNS", "(", advance=False): 8513 this = self._parse_function() 8514 if isinstance(this, exp.Columns): 8515 this.set("unpack", True) 8516 return this 8517 8518 return self.expression( 8519 exp.Star, 8520 **{ # type: ignore 8521 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8522 "replace": self._parse_star_op("REPLACE"), 8523 "rename": self._parse_star_op("RENAME"), 8524 }, 8525 ).update_positions(star_token) 8526 8527 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8528 privilege_parts = [] 8529 8530 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8531 # (end of privilege list) or L_PAREN (start of column list) are met 8532 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8533 privilege_parts.append(self._curr.text.upper()) 8534 self._advance() 8535 8536 this = exp.var(" ".join(privilege_parts)) 8537 expressions = ( 8538 self._parse_wrapped_csv(self._parse_column) 8539 if self._match(TokenType.L_PAREN, advance=False) 8540 else None 8541 ) 8542 8543 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8544 8545 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8546 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8547 principal = self._parse_id_var() 8548 8549 if not principal: 8550 return None 8551 8552 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8553 8554 def _parse_grant_revoke_common( 8555 self, 8556 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8557 privileges = self._parse_csv(self._parse_grant_privilege) 8558 8559 self._match(TokenType.ON) 8560 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8561 8562 # Attempt to parse the securable e.g. MySQL allows names 8563 # such as "foo.*", "*.*" which are not easily parseable yet 8564 securable = self._try_parse(self._parse_table_parts) 8565 8566 return privileges, kind, securable 8567 8568 def _parse_grant(self) -> exp.Grant | exp.Command: 8569 start = self._prev 8570 8571 privileges, kind, securable = self._parse_grant_revoke_common() 8572 8573 if not securable or not self._match_text_seq("TO"): 8574 return self._parse_as_command(start) 8575 8576 principals = self._parse_csv(self._parse_grant_principal) 8577 8578 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8579 8580 if self._curr: 8581 return self._parse_as_command(start) 8582 8583 return self.expression( 8584 exp.Grant, 8585 privileges=privileges, 8586 kind=kind, 8587 securable=securable, 8588 principals=principals, 8589 grant_option=grant_option, 8590 ) 8591 8592 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8593 start = self._prev 8594 8595 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8596 8597 privileges, kind, securable = self._parse_grant_revoke_common() 8598 8599 if not securable or not self._match_text_seq("FROM"): 8600 return self._parse_as_command(start) 8601 8602 principals = self._parse_csv(self._parse_grant_principal) 8603 8604 cascade = None 8605 if self._match_texts(("CASCADE", "RESTRICT")): 8606 cascade = self._prev.text.upper() 8607 8608 if self._curr: 8609 return self._parse_as_command(start) 8610 8611 return self.expression( 8612 exp.Revoke, 8613 privileges=privileges, 8614 kind=kind, 8615 securable=securable, 8616 principals=principals, 8617 grant_option=grant_option, 8618 cascade=cascade, 8619 ) 8620 8621 def _parse_overlay(self) -> exp.Overlay: 8622 return self.expression( 8623 exp.Overlay, 8624 **{ # type: ignore 8625 "this": self._parse_bitwise(), 8626 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8627 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8628 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8629 }, 8630 ) 8631 8632 def _parse_format_name(self) -> exp.Property: 8633 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8634 # for FILE_FORMAT = <format_name> 8635 return self.expression( 8636 exp.Property, 8637 this=exp.var("FORMAT_NAME"), 8638 value=self._parse_string() or self._parse_table_parts(), 8639 ) 8640 8641 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8642 args: t.List[exp.Expression] = [] 8643 8644 if self._match(TokenType.DISTINCT): 8645 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8646 self._match(TokenType.COMMA) 8647 8648 args.extend(self._parse_csv(self._parse_assignment)) 8649 8650 return self.expression( 8651 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8652 ) 8653 8654 def _identifier_expression( 8655 self, token: t.Optional[Token] = None, **kwargs: t.Any 8656 ) -> exp.Identifier: 8657 token = token or self._prev 8658 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8659 expression.update_positions(token) 8660 return expression 8661 8662 def _build_pipe_cte( 8663 self, 8664 query: exp.Query, 8665 expressions: t.List[exp.Expression], 8666 alias_cte: t.Optional[exp.TableAlias] = None, 8667 ) -> exp.Select: 8668 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8669 if alias_cte: 8670 new_cte = alias_cte 8671 else: 8672 self._pipe_cte_counter += 1 8673 new_cte = f"__tmp{self._pipe_cte_counter}" 8674 8675 with_ = query.args.get("with") 8676 ctes = with_.pop() if with_ else None 8677 8678 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8679 if ctes: 8680 new_select.set("with", ctes) 8681 8682 return new_select.with_(new_cte, as_=query, copy=False) 8683 8684 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8685 select = self._parse_select(consume_pipe=False) 8686 if not select: 8687 return query 8688 8689 return self._build_pipe_cte( 8690 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8691 ) 8692 8693 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8694 limit = self._parse_limit() 8695 offset = self._parse_offset() 8696 if limit: 8697 curr_limit = query.args.get("limit", limit) 8698 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8699 query.limit(limit, copy=False) 8700 if offset: 8701 curr_offset = query.args.get("offset") 8702 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8703 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8704 8705 return query 8706 8707 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8708 this = self._parse_assignment() 8709 if self._match_text_seq("GROUP", "AND", advance=False): 8710 return this 8711 8712 this = self._parse_alias(this) 8713 8714 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8715 return self._parse_ordered(lambda: this) 8716 8717 return this 8718 8719 def _parse_pipe_syntax_aggregate_group_order_by( 8720 self, query: exp.Select, group_by_exists: bool = True 8721 ) -> exp.Select: 8722 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8723 aggregates_or_groups, orders = [], [] 8724 for element in expr: 8725 if isinstance(element, exp.Ordered): 8726 this = element.this 8727 if isinstance(this, exp.Alias): 8728 element.set("this", this.args["alias"]) 8729 orders.append(element) 8730 else: 8731 this = element 8732 aggregates_or_groups.append(this) 8733 8734 if group_by_exists: 8735 query.select(*aggregates_or_groups, copy=False).group_by( 8736 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8737 copy=False, 8738 ) 8739 else: 8740 query.select(*aggregates_or_groups, append=False, copy=False) 8741 8742 if orders: 8743 return query.order_by(*orders, append=False, copy=False) 8744 8745 return query 8746 8747 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8748 self._match_text_seq("AGGREGATE") 8749 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8750 8751 if self._match(TokenType.GROUP_BY) or ( 8752 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8753 ): 8754 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8755 8756 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8757 8758 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8759 first_setop = self.parse_set_operation(this=query) 8760 if not first_setop: 8761 return None 8762 8763 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8764 expr = self._parse_paren() 8765 return expr.assert_is(exp.Subquery).unnest() if expr else None 8766 8767 first_setop.this.pop() 8768 8769 setops = [ 8770 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8771 *self._parse_csv(_parse_and_unwrap_query), 8772 ] 8773 8774 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8775 with_ = query.args.get("with") 8776 ctes = with_.pop() if with_ else None 8777 8778 if isinstance(first_setop, exp.Union): 8779 query = query.union(*setops, copy=False, **first_setop.args) 8780 elif isinstance(first_setop, exp.Except): 8781 query = query.except_(*setops, copy=False, **first_setop.args) 8782 else: 8783 query = query.intersect(*setops, copy=False, **first_setop.args) 8784 8785 query.set("with", ctes) 8786 8787 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8788 8789 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8790 join = self._parse_join() 8791 if not join: 8792 return None 8793 8794 if isinstance(query, exp.Select): 8795 return query.join(join, copy=False) 8796 8797 return query 8798 8799 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8800 pivots = self._parse_pivots() 8801 if not pivots: 8802 return query 8803 8804 from_ = query.args.get("from") 8805 if from_: 8806 from_.this.set("pivots", pivots) 8807 8808 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8809 8810 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8811 self._match_text_seq("EXTEND") 8812 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8813 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8814 8815 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8816 sample = self._parse_table_sample() 8817 8818 with_ = query.args.get("with") 8819 if with_: 8820 with_.expressions[-1].this.set("sample", sample) 8821 else: 8822 query.set("sample", sample) 8823 8824 return query 8825 8826 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8827 if isinstance(query, exp.Subquery): 8828 query = exp.select("*").from_(query, copy=False) 8829 8830 if not query.args.get("from"): 8831 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8832 8833 while self._match(TokenType.PIPE_GT): 8834 start = self._curr 8835 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8836 if not parser: 8837 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8838 # keywords, making it tricky to disambiguate them without lookahead. The approach 8839 # here is to try and parse a set operation and if that fails, then try to parse a 8840 # join operator. If that fails as well, then the operator is not supported. 8841 parsed_query = self._parse_pipe_syntax_set_operator(query) 8842 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8843 if not parsed_query: 8844 self._retreat(start) 8845 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8846 break 8847 query = parsed_query 8848 else: 8849 query = parser(self, query) 8850 8851 return query 8852 8853 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8854 vars = self._parse_csv(self._parse_id_var) 8855 if not vars: 8856 return None 8857 8858 return self.expression( 8859 exp.DeclareItem, 8860 this=vars, 8861 kind=self._parse_types(), 8862 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8863 ) 8864 8865 def _parse_declare(self) -> exp.Declare | exp.Command: 8866 start = self._prev 8867 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8868 8869 if not expressions or self._curr: 8870 return self._parse_as_command(start) 8871 8872 return self.expression(exp.Declare, expressions=expressions) 8873 8874 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8875 exp_class = exp.Cast if strict else exp.TryCast 8876 8877 if exp_class == exp.TryCast: 8878 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8879 8880 return self.expression(exp_class, **kwargs) 8881 8882 def _parse_json_value(self) -> exp.JSONValue: 8883 this = self._parse_bitwise() 8884 self._match(TokenType.COMMA) 8885 path = self._parse_bitwise() 8886 8887 returning = self._match(TokenType.RETURNING) and self._parse_type() 8888 8889 return self.expression( 8890 exp.JSONValue, 8891 this=this, 8892 path=self.dialect.to_json_path(path), 8893 returning=returning, 8894 on_condition=self._parse_on_condition(), 8895 ) 8896 8897 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8898 def concat_exprs( 8899 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8900 ) -> exp.Expression: 8901 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8902 concat_exprs = [ 8903 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8904 ] 8905 node.set("expressions", concat_exprs) 8906 return node 8907 if len(exprs) == 1: 8908 return exprs[0] 8909 return self.expression(exp.Concat, expressions=args, safe=True) 8910 8911 args = self._parse_csv(self._parse_lambda) 8912 8913 if args: 8914 order = args[-1] if isinstance(args[-1], exp.Order) else None 8915 8916 if order: 8917 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8918 # remove 'expr' from exp.Order and add it back to args 8919 args[-1] = order.this 8920 order.set("this", concat_exprs(order.this, args)) 8921 8922 this = order or concat_exprs(args[0], args) 8923 else: 8924 this = None 8925 8926 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8927 8928 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.LOCK, 536 TokenType.MERGE, 537 TokenType.NATURAL, 538 TokenType.NEXT, 539 TokenType.OFFSET, 540 TokenType.OPERATOR, 541 TokenType.ORDINALITY, 542 TokenType.OVERLAPS, 543 TokenType.OVERWRITE, 544 TokenType.PARTITION, 545 TokenType.PERCENT, 546 TokenType.PIVOT, 547 TokenType.PRAGMA, 548 TokenType.PUT, 549 TokenType.RANGE, 550 TokenType.RECURSIVE, 551 TokenType.REFERENCES, 552 TokenType.REFRESH, 553 TokenType.RENAME, 554 TokenType.REPLACE, 555 TokenType.RIGHT, 556 TokenType.ROLLUP, 557 TokenType.ROW, 558 TokenType.ROWS, 559 TokenType.SEMI, 560 TokenType.SET, 561 TokenType.SETTINGS, 562 TokenType.SHOW, 563 TokenType.TEMPORARY, 564 TokenType.TOP, 565 TokenType.TRUE, 566 TokenType.TRUNCATE, 567 TokenType.UNIQUE, 568 TokenType.UNNEST, 569 TokenType.UNPIVOT, 570 TokenType.UPDATE, 571 TokenType.USE, 572 TokenType.VOLATILE, 573 TokenType.WINDOW, 574 *ALTERABLES, 575 *CREATABLES, 576 *SUBQUERY_PREDICATES, 577 *TYPE_TOKENS, 578 *NO_PAREN_FUNCTIONS, 579 } 580 ID_VAR_TOKENS.remove(TokenType.UNION) 581 582 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 583 TokenType.ANTI, 584 TokenType.ASOF, 585 TokenType.FULL, 586 TokenType.LEFT, 587 TokenType.LOCK, 588 TokenType.NATURAL, 589 TokenType.RIGHT, 590 TokenType.SEMI, 591 TokenType.WINDOW, 592 } 593 594 ALIAS_TOKENS = ID_VAR_TOKENS 595 596 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 597 598 ARRAY_CONSTRUCTORS = { 599 "ARRAY": exp.Array, 600 "LIST": exp.List, 601 } 602 603 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 604 605 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 606 607 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 608 609 FUNC_TOKENS = { 610 TokenType.COLLATE, 611 TokenType.COMMAND, 612 TokenType.CURRENT_DATE, 613 TokenType.CURRENT_DATETIME, 614 TokenType.CURRENT_SCHEMA, 615 TokenType.CURRENT_TIMESTAMP, 616 TokenType.CURRENT_TIME, 617 TokenType.CURRENT_USER, 618 TokenType.FILTER, 619 TokenType.FIRST, 620 TokenType.FORMAT, 621 TokenType.GET, 622 TokenType.GLOB, 623 TokenType.IDENTIFIER, 624 TokenType.INDEX, 625 TokenType.ISNULL, 626 TokenType.ILIKE, 627 TokenType.INSERT, 628 TokenType.LIKE, 629 TokenType.MERGE, 630 TokenType.NEXT, 631 TokenType.OFFSET, 632 TokenType.PRIMARY_KEY, 633 TokenType.RANGE, 634 TokenType.REPLACE, 635 TokenType.RLIKE, 636 TokenType.ROW, 637 TokenType.UNNEST, 638 TokenType.VAR, 639 TokenType.LEFT, 640 TokenType.RIGHT, 641 TokenType.SEQUENCE, 642 TokenType.DATE, 643 TokenType.DATETIME, 644 TokenType.TABLE, 645 TokenType.TIMESTAMP, 646 TokenType.TIMESTAMPTZ, 647 TokenType.TRUNCATE, 648 TokenType.UTC_DATE, 649 TokenType.UTC_TIME, 650 TokenType.UTC_TIMESTAMP, 651 TokenType.WINDOW, 652 TokenType.XOR, 653 *TYPE_TOKENS, 654 *SUBQUERY_PREDICATES, 655 } 656 657 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 658 TokenType.AND: exp.And, 659 } 660 661 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 662 TokenType.COLON_EQ: exp.PropertyEQ, 663 } 664 665 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 666 TokenType.OR: exp.Or, 667 } 668 669 EQUALITY = { 670 TokenType.EQ: exp.EQ, 671 TokenType.NEQ: exp.NEQ, 672 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 673 } 674 675 COMPARISON = { 676 TokenType.GT: exp.GT, 677 TokenType.GTE: exp.GTE, 678 TokenType.LT: exp.LT, 679 TokenType.LTE: exp.LTE, 680 } 681 682 BITWISE = { 683 TokenType.AMP: exp.BitwiseAnd, 684 TokenType.CARET: exp.BitwiseXor, 685 TokenType.PIPE: exp.BitwiseOr, 686 } 687 688 TERM = { 689 TokenType.DASH: exp.Sub, 690 TokenType.PLUS: exp.Add, 691 TokenType.MOD: exp.Mod, 692 TokenType.COLLATE: exp.Collate, 693 } 694 695 FACTOR = { 696 TokenType.DIV: exp.IntDiv, 697 TokenType.LR_ARROW: exp.Distance, 698 TokenType.SLASH: exp.Div, 699 TokenType.STAR: exp.Mul, 700 } 701 702 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 703 704 TIMES = { 705 TokenType.TIME, 706 TokenType.TIMETZ, 707 } 708 709 TIMESTAMPS = { 710 TokenType.TIMESTAMP, 711 TokenType.TIMESTAMPNTZ, 712 TokenType.TIMESTAMPTZ, 713 TokenType.TIMESTAMPLTZ, 714 *TIMES, 715 } 716 717 SET_OPERATIONS = { 718 TokenType.UNION, 719 TokenType.INTERSECT, 720 TokenType.EXCEPT, 721 } 722 723 JOIN_METHODS = { 724 TokenType.ASOF, 725 TokenType.NATURAL, 726 TokenType.POSITIONAL, 727 } 728 729 JOIN_SIDES = { 730 TokenType.LEFT, 731 TokenType.RIGHT, 732 TokenType.FULL, 733 } 734 735 JOIN_KINDS = { 736 TokenType.ANTI, 737 TokenType.CROSS, 738 TokenType.INNER, 739 TokenType.OUTER, 740 TokenType.SEMI, 741 TokenType.STRAIGHT_JOIN, 742 } 743 744 JOIN_HINTS: t.Set[str] = set() 745 746 LAMBDAS = { 747 TokenType.ARROW: lambda self, expressions: self.expression( 748 exp.Lambda, 749 this=self._replace_lambda( 750 self._parse_assignment(), 751 expressions, 752 ), 753 expressions=expressions, 754 ), 755 TokenType.FARROW: lambda self, expressions: self.expression( 756 exp.Kwarg, 757 this=exp.var(expressions[0].name), 758 expression=self._parse_assignment(), 759 ), 760 } 761 762 COLUMN_OPERATORS = { 763 TokenType.DOT: None, 764 TokenType.DOTCOLON: lambda self, this, to: self.expression( 765 exp.JSONCast, 766 this=this, 767 to=to, 768 ), 769 TokenType.DCOLON: lambda self, this, to: self.build_cast( 770 strict=self.STRICT_CAST, this=this, to=to 771 ), 772 TokenType.ARROW: lambda self, this, path: self.expression( 773 exp.JSONExtract, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.DARROW: lambda self, this, path: self.expression( 779 exp.JSONExtractScalar, 780 this=this, 781 expression=self.dialect.to_json_path(path), 782 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 783 ), 784 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtract, 786 this=this, 787 expression=path, 788 ), 789 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 790 exp.JSONBExtractScalar, 791 this=this, 792 expression=path, 793 ), 794 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 795 exp.JSONBContains, 796 this=this, 797 expression=key, 798 ), 799 } 800 801 CAST_COLUMN_OPERATORS = { 802 TokenType.DOTCOLON, 803 TokenType.DCOLON, 804 } 805 806 EXPRESSION_PARSERS = { 807 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 808 exp.Column: lambda self: self._parse_column(), 809 exp.Condition: lambda self: self._parse_assignment(), 810 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 811 exp.Expression: lambda self: self._parse_expression(), 812 exp.From: lambda self: self._parse_from(joins=True), 813 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 814 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 815 exp.Group: lambda self: self._parse_group(), 816 exp.Having: lambda self: self._parse_having(), 817 exp.Hint: lambda self: self._parse_hint_body(), 818 exp.Identifier: lambda self: self._parse_id_var(), 819 exp.Join: lambda self: self._parse_join(), 820 exp.Lambda: lambda self: self._parse_lambda(), 821 exp.Lateral: lambda self: self._parse_lateral(), 822 exp.Limit: lambda self: self._parse_limit(), 823 exp.Offset: lambda self: self._parse_offset(), 824 exp.Order: lambda self: self._parse_order(), 825 exp.Ordered: lambda self: self._parse_ordered(), 826 exp.Properties: lambda self: self._parse_properties(), 827 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 828 exp.Qualify: lambda self: self._parse_qualify(), 829 exp.Returning: lambda self: self._parse_returning(), 830 exp.Select: lambda self: self._parse_select(), 831 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 832 exp.Table: lambda self: self._parse_table_parts(), 833 exp.TableAlias: lambda self: self._parse_table_alias(), 834 exp.Tuple: lambda self: self._parse_value(values=False), 835 exp.Whens: lambda self: self._parse_when_matched(), 836 exp.Where: lambda self: self._parse_where(), 837 exp.Window: lambda self: self._parse_named_window(), 838 exp.With: lambda self: self._parse_with(), 839 "JOIN_TYPE": lambda self: self._parse_join_parts(), 840 } 841 842 STATEMENT_PARSERS = { 843 TokenType.ALTER: lambda self: self._parse_alter(), 844 TokenType.ANALYZE: lambda self: self._parse_analyze(), 845 TokenType.BEGIN: lambda self: self._parse_transaction(), 846 TokenType.CACHE: lambda self: self._parse_cache(), 847 TokenType.COMMENT: lambda self: self._parse_comment(), 848 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 849 TokenType.COPY: lambda self: self._parse_copy(), 850 TokenType.CREATE: lambda self: self._parse_create(), 851 TokenType.DELETE: lambda self: self._parse_delete(), 852 TokenType.DESC: lambda self: self._parse_describe(), 853 TokenType.DESCRIBE: lambda self: self._parse_describe(), 854 TokenType.DROP: lambda self: self._parse_drop(), 855 TokenType.GRANT: lambda self: self._parse_grant(), 856 TokenType.REVOKE: lambda self: self._parse_revoke(), 857 TokenType.INSERT: lambda self: self._parse_insert(), 858 TokenType.KILL: lambda self: self._parse_kill(), 859 TokenType.LOAD: lambda self: self._parse_load(), 860 TokenType.MERGE: lambda self: self._parse_merge(), 861 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 862 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 863 TokenType.REFRESH: lambda self: self._parse_refresh(), 864 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 865 TokenType.SET: lambda self: self._parse_set(), 866 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 867 TokenType.UNCACHE: lambda self: self._parse_uncache(), 868 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 869 TokenType.UPDATE: lambda self: self._parse_update(), 870 TokenType.USE: lambda self: self._parse_use(), 871 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 872 } 873 874 UNARY_PARSERS = { 875 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 876 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 877 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 878 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 879 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 880 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 881 } 882 883 STRING_PARSERS = { 884 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 885 exp.RawString, this=token.text 886 ), 887 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 888 exp.National, this=token.text 889 ), 890 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 891 TokenType.STRING: lambda self, token: self.expression( 892 exp.Literal, this=token.text, is_string=True 893 ), 894 TokenType.UNICODE_STRING: lambda self, token: self.expression( 895 exp.UnicodeString, 896 this=token.text, 897 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 898 ), 899 } 900 901 NUMERIC_PARSERS = { 902 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 903 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 904 TokenType.HEX_STRING: lambda self, token: self.expression( 905 exp.HexString, 906 this=token.text, 907 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 908 ), 909 TokenType.NUMBER: lambda self, token: self.expression( 910 exp.Literal, this=token.text, is_string=False 911 ), 912 } 913 914 PRIMARY_PARSERS = { 915 **STRING_PARSERS, 916 **NUMERIC_PARSERS, 917 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 918 TokenType.NULL: lambda self, _: self.expression(exp.Null), 919 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 920 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 921 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 922 TokenType.STAR: lambda self, _: self._parse_star_ops(), 923 } 924 925 PLACEHOLDER_PARSERS = { 926 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 927 TokenType.PARAMETER: lambda self: self._parse_parameter(), 928 TokenType.COLON: lambda self: ( 929 self.expression(exp.Placeholder, this=self._prev.text) 930 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 931 else None 932 ), 933 } 934 935 RANGE_PARSERS = { 936 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 937 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 938 TokenType.GLOB: binary_range_parser(exp.Glob), 939 TokenType.ILIKE: binary_range_parser(exp.ILike), 940 TokenType.IN: lambda self, this: self._parse_in(this), 941 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 942 TokenType.IS: lambda self, this: self._parse_is(this), 943 TokenType.LIKE: binary_range_parser(exp.Like), 944 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 945 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 946 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 947 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 948 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 949 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 950 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 951 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 952 } 953 954 PIPE_SYNTAX_TRANSFORM_PARSERS = { 955 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 956 "AS": lambda self, query: self._build_pipe_cte( 957 query, [exp.Star()], self._parse_table_alias() 958 ), 959 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 960 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 961 "ORDER BY": lambda self, query: query.order_by( 962 self._parse_order(), append=False, copy=False 963 ), 964 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 965 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 966 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 967 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 968 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 969 } 970 971 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 972 "ALLOWED_VALUES": lambda self: self.expression( 973 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 974 ), 975 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 976 "AUTO": lambda self: self._parse_auto_property(), 977 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 978 "BACKUP": lambda self: self.expression( 979 exp.BackupProperty, this=self._parse_var(any_token=True) 980 ), 981 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 982 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 983 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 984 "CHECKSUM": lambda self: self._parse_checksum(), 985 "CLUSTER BY": lambda self: self._parse_cluster(), 986 "CLUSTERED": lambda self: self._parse_clustered_by(), 987 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 988 exp.CollateProperty, **kwargs 989 ), 990 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 991 "CONTAINS": lambda self: self._parse_contains_property(), 992 "COPY": lambda self: self._parse_copy_property(), 993 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 994 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 995 "DEFINER": lambda self: self._parse_definer(), 996 "DETERMINISTIC": lambda self: self.expression( 997 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 998 ), 999 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1000 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1001 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1002 "DISTKEY": lambda self: self._parse_distkey(), 1003 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1004 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1005 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1006 "ENVIRONMENT": lambda self: self.expression( 1007 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1008 ), 1009 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1010 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1011 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1012 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1013 "FREESPACE": lambda self: self._parse_freespace(), 1014 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1015 "HEAP": lambda self: self.expression(exp.HeapProperty), 1016 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1017 "IMMUTABLE": lambda self: self.expression( 1018 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1019 ), 1020 "INHERITS": lambda self: self.expression( 1021 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1022 ), 1023 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1024 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1025 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1026 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1027 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1028 "LIKE": lambda self: self._parse_create_like(), 1029 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1030 "LOCK": lambda self: self._parse_locking(), 1031 "LOCKING": lambda self: self._parse_locking(), 1032 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1033 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1034 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1035 "MODIFIES": lambda self: self._parse_modifies_property(), 1036 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1037 "NO": lambda self: self._parse_no_property(), 1038 "ON": lambda self: self._parse_on_property(), 1039 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1040 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1041 "PARTITION": lambda self: self._parse_partitioned_of(), 1042 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1043 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1044 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1045 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1046 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1047 "READS": lambda self: self._parse_reads_property(), 1048 "REMOTE": lambda self: self._parse_remote_with_connection(), 1049 "RETURNS": lambda self: self._parse_returns(), 1050 "STRICT": lambda self: self.expression(exp.StrictProperty), 1051 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1052 "ROW": lambda self: self._parse_row(), 1053 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1054 "SAMPLE": lambda self: self.expression( 1055 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1056 ), 1057 "SECURE": lambda self: self.expression(exp.SecureProperty), 1058 "SECURITY": lambda self: self._parse_security(), 1059 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1060 "SETTINGS": lambda self: self._parse_settings_property(), 1061 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1062 "SORTKEY": lambda self: self._parse_sortkey(), 1063 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1064 "STABLE": lambda self: self.expression( 1065 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1066 ), 1067 "STORED": lambda self: self._parse_stored(), 1068 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1069 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1070 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1071 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1072 "TO": lambda self: self._parse_to_table(), 1073 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1074 "TRANSFORM": lambda self: self.expression( 1075 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1076 ), 1077 "TTL": lambda self: self._parse_ttl(), 1078 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1079 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1080 "VOLATILE": lambda self: self._parse_volatile_property(), 1081 "WITH": lambda self: self._parse_with_property(), 1082 } 1083 1084 CONSTRAINT_PARSERS = { 1085 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1086 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1087 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1088 "CHARACTER SET": lambda self: self.expression( 1089 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1090 ), 1091 "CHECK": lambda self: self.expression( 1092 exp.CheckColumnConstraint, 1093 this=self._parse_wrapped(self._parse_assignment), 1094 enforced=self._match_text_seq("ENFORCED"), 1095 ), 1096 "COLLATE": lambda self: self.expression( 1097 exp.CollateColumnConstraint, 1098 this=self._parse_identifier() or self._parse_column(), 1099 ), 1100 "COMMENT": lambda self: self.expression( 1101 exp.CommentColumnConstraint, this=self._parse_string() 1102 ), 1103 "COMPRESS": lambda self: self._parse_compress(), 1104 "CLUSTERED": lambda self: self.expression( 1105 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1106 ), 1107 "NONCLUSTERED": lambda self: self.expression( 1108 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1109 ), 1110 "DEFAULT": lambda self: self.expression( 1111 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1112 ), 1113 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1114 "EPHEMERAL": lambda self: self.expression( 1115 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1116 ), 1117 "EXCLUDE": lambda self: self.expression( 1118 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1119 ), 1120 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1121 "FORMAT": lambda self: self.expression( 1122 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1123 ), 1124 "GENERATED": lambda self: self._parse_generated_as_identity(), 1125 "IDENTITY": lambda self: self._parse_auto_increment(), 1126 "INLINE": lambda self: self._parse_inline(), 1127 "LIKE": lambda self: self._parse_create_like(), 1128 "NOT": lambda self: self._parse_not_constraint(), 1129 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1130 "ON": lambda self: ( 1131 self._match(TokenType.UPDATE) 1132 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1133 ) 1134 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1135 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1136 "PERIOD": lambda self: self._parse_period_for_system_time(), 1137 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1138 "REFERENCES": lambda self: self._parse_references(match=False), 1139 "TITLE": lambda self: self.expression( 1140 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1141 ), 1142 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1143 "UNIQUE": lambda self: self._parse_unique(), 1144 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1145 "WATERMARK": lambda self: self.expression( 1146 exp.WatermarkColumnConstraint, 1147 this=self._match(TokenType.FOR) and self._parse_column(), 1148 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1149 ), 1150 "WITH": lambda self: self.expression( 1151 exp.Properties, expressions=self._parse_wrapped_properties() 1152 ), 1153 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1154 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1155 } 1156 1157 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1158 if not self._match(TokenType.L_PAREN, advance=False): 1159 # Partitioning by bucket or truncate follows the syntax: 1160 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1161 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1162 self._retreat(self._index - 1) 1163 return None 1164 1165 klass = ( 1166 exp.PartitionedByBucket 1167 if self._prev.text.upper() == "BUCKET" 1168 else exp.PartitionByTruncate 1169 ) 1170 1171 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1172 this, expression = seq_get(args, 0), seq_get(args, 1) 1173 1174 if isinstance(this, exp.Literal): 1175 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1176 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1177 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1178 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1179 # 1180 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1181 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1182 this, expression = expression, this 1183 1184 return self.expression(klass, this=this, expression=expression) 1185 1186 ALTER_PARSERS = { 1187 "ADD": lambda self: self._parse_alter_table_add(), 1188 "AS": lambda self: self._parse_select(), 1189 "ALTER": lambda self: self._parse_alter_table_alter(), 1190 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1191 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1192 "DROP": lambda self: self._parse_alter_table_drop(), 1193 "RENAME": lambda self: self._parse_alter_table_rename(), 1194 "SET": lambda self: self._parse_alter_table_set(), 1195 "SWAP": lambda self: self.expression( 1196 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1197 ), 1198 } 1199 1200 ALTER_ALTER_PARSERS = { 1201 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1202 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1203 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1204 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1205 } 1206 1207 SCHEMA_UNNAMED_CONSTRAINTS = { 1208 "CHECK", 1209 "EXCLUDE", 1210 "FOREIGN KEY", 1211 "LIKE", 1212 "PERIOD", 1213 "PRIMARY KEY", 1214 "UNIQUE", 1215 "WATERMARK", 1216 "BUCKET", 1217 "TRUNCATE", 1218 } 1219 1220 NO_PAREN_FUNCTION_PARSERS = { 1221 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1222 "CASE": lambda self: self._parse_case(), 1223 "CONNECT_BY_ROOT": lambda self: self.expression( 1224 exp.ConnectByRoot, this=self._parse_column() 1225 ), 1226 "IF": lambda self: self._parse_if(), 1227 } 1228 1229 INVALID_FUNC_NAME_TOKENS = { 1230 TokenType.IDENTIFIER, 1231 TokenType.STRING, 1232 } 1233 1234 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1235 1236 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1237 1238 FUNCTION_PARSERS = { 1239 **{ 1240 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1241 }, 1242 **{ 1243 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1244 }, 1245 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1246 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1247 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1248 "DECODE": lambda self: self._parse_decode(), 1249 "EXTRACT": lambda self: self._parse_extract(), 1250 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1251 "GAP_FILL": lambda self: self._parse_gap_fill(), 1252 "JSON_OBJECT": lambda self: self._parse_json_object(), 1253 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1254 "JSON_TABLE": lambda self: self._parse_json_table(), 1255 "MATCH": lambda self: self._parse_match_against(), 1256 "NORMALIZE": lambda self: self._parse_normalize(), 1257 "OPENJSON": lambda self: self._parse_open_json(), 1258 "OVERLAY": lambda self: self._parse_overlay(), 1259 "POSITION": lambda self: self._parse_position(), 1260 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1261 "STRING_AGG": lambda self: self._parse_string_agg(), 1262 "SUBSTRING": lambda self: self._parse_substring(), 1263 "TRIM": lambda self: self._parse_trim(), 1264 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1265 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1266 "XMLELEMENT": lambda self: self.expression( 1267 exp.XMLElement, 1268 this=self._match_text_seq("NAME") and self._parse_id_var(), 1269 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1270 ), 1271 "XMLTABLE": lambda self: self._parse_xml_table(), 1272 } 1273 1274 QUERY_MODIFIER_PARSERS = { 1275 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1276 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1277 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1278 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1279 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1280 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1281 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1282 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1283 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1284 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1285 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1286 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1287 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1288 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1289 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1290 TokenType.CLUSTER_BY: lambda self: ( 1291 "cluster", 1292 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1293 ), 1294 TokenType.DISTRIBUTE_BY: lambda self: ( 1295 "distribute", 1296 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1297 ), 1298 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1299 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1300 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1301 } 1302 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1303 1304 SET_PARSERS = { 1305 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1306 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1307 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1308 "TRANSACTION": lambda self: self._parse_set_transaction(), 1309 } 1310 1311 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1312 1313 TYPE_LITERAL_PARSERS = { 1314 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1315 } 1316 1317 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1318 1319 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1320 1321 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1322 1323 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1324 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1325 "ISOLATION": ( 1326 ("LEVEL", "REPEATABLE", "READ"), 1327 ("LEVEL", "READ", "COMMITTED"), 1328 ("LEVEL", "READ", "UNCOMITTED"), 1329 ("LEVEL", "SERIALIZABLE"), 1330 ), 1331 "READ": ("WRITE", "ONLY"), 1332 } 1333 1334 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1335 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1336 ) 1337 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1338 1339 CREATE_SEQUENCE: OPTIONS_TYPE = { 1340 "SCALE": ("EXTEND", "NOEXTEND"), 1341 "SHARD": ("EXTEND", "NOEXTEND"), 1342 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1343 **dict.fromkeys( 1344 ( 1345 "SESSION", 1346 "GLOBAL", 1347 "KEEP", 1348 "NOKEEP", 1349 "ORDER", 1350 "NOORDER", 1351 "NOCACHE", 1352 "CYCLE", 1353 "NOCYCLE", 1354 "NOMINVALUE", 1355 "NOMAXVALUE", 1356 "NOSCALE", 1357 "NOSHARD", 1358 ), 1359 tuple(), 1360 ), 1361 } 1362 1363 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1364 1365 USABLES: OPTIONS_TYPE = dict.fromkeys( 1366 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1367 ) 1368 1369 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1370 1371 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1372 "TYPE": ("EVOLUTION",), 1373 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1374 } 1375 1376 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1377 1378 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1379 1380 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1381 "NOT": ("ENFORCED",), 1382 "MATCH": ( 1383 "FULL", 1384 "PARTIAL", 1385 "SIMPLE", 1386 ), 1387 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1388 "USING": ( 1389 "BTREE", 1390 "HASH", 1391 ), 1392 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1393 } 1394 1395 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1396 "NO": ("OTHERS",), 1397 "CURRENT": ("ROW",), 1398 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1399 } 1400 1401 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1402 1403 CLONE_KEYWORDS = {"CLONE", "COPY"} 1404 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1405 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1406 1407 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1408 1409 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1410 1411 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1412 1413 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1414 1415 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1416 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1417 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1418 1419 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1420 1421 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1422 1423 ADD_CONSTRAINT_TOKENS = { 1424 TokenType.CONSTRAINT, 1425 TokenType.FOREIGN_KEY, 1426 TokenType.INDEX, 1427 TokenType.KEY, 1428 TokenType.PRIMARY_KEY, 1429 TokenType.UNIQUE, 1430 } 1431 1432 DISTINCT_TOKENS = {TokenType.DISTINCT} 1433 1434 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1435 1436 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1437 1438 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1439 1440 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1441 1442 ODBC_DATETIME_LITERALS = { 1443 "d": exp.Date, 1444 "t": exp.Time, 1445 "ts": exp.Timestamp, 1446 } 1447 1448 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1449 1450 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1451 1452 # The style options for the DESCRIBE statement 1453 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1454 1455 # The style options for the ANALYZE statement 1456 ANALYZE_STYLES = { 1457 "BUFFER_USAGE_LIMIT", 1458 "FULL", 1459 "LOCAL", 1460 "NO_WRITE_TO_BINLOG", 1461 "SAMPLE", 1462 "SKIP_LOCKED", 1463 "VERBOSE", 1464 } 1465 1466 ANALYZE_EXPRESSION_PARSERS = { 1467 "ALL": lambda self: self._parse_analyze_columns(), 1468 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1469 "DELETE": lambda self: self._parse_analyze_delete(), 1470 "DROP": lambda self: self._parse_analyze_histogram(), 1471 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1472 "LIST": lambda self: self._parse_analyze_list(), 1473 "PREDICATE": lambda self: self._parse_analyze_columns(), 1474 "UPDATE": lambda self: self._parse_analyze_histogram(), 1475 "VALIDATE": lambda self: self._parse_analyze_validate(), 1476 } 1477 1478 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1479 1480 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1481 1482 OPERATION_MODIFIERS: t.Set[str] = set() 1483 1484 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1485 1486 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1487 1488 STRICT_CAST = True 1489 1490 PREFIXED_PIVOT_COLUMNS = False 1491 IDENTIFY_PIVOT_STRINGS = False 1492 1493 LOG_DEFAULTS_TO_LN = False 1494 1495 # Whether the table sample clause expects CSV syntax 1496 TABLESAMPLE_CSV = False 1497 1498 # The default method used for table sampling 1499 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1500 1501 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1502 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1503 1504 # Whether the TRIM function expects the characters to trim as its first argument 1505 TRIM_PATTERN_FIRST = False 1506 1507 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1508 STRING_ALIASES = False 1509 1510 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1511 MODIFIERS_ATTACHED_TO_SET_OP = True 1512 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1513 1514 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1515 NO_PAREN_IF_COMMANDS = True 1516 1517 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1518 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1519 1520 # Whether the `:` operator is used to extract a value from a VARIANT column 1521 COLON_IS_VARIANT_EXTRACT = False 1522 1523 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1524 # If this is True and '(' is not found, the keyword will be treated as an identifier 1525 VALUES_FOLLOWED_BY_PAREN = True 1526 1527 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1528 SUPPORTS_IMPLICIT_UNNEST = False 1529 1530 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1531 INTERVAL_SPANS = True 1532 1533 # Whether a PARTITION clause can follow a table reference 1534 SUPPORTS_PARTITION_SELECTION = False 1535 1536 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1537 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1538 1539 # Whether the 'AS' keyword is optional in the CTE definition syntax 1540 OPTIONAL_ALIAS_TOKEN_CTE = True 1541 1542 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1543 ALTER_RENAME_REQUIRES_COLUMN = True 1544 1545 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1546 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1547 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1548 # as BigQuery, where all joins have the same precedence. 1549 JOINS_HAVE_EQUAL_PRECEDENCE = False 1550 1551 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1552 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1553 1554 # Whether map literals support arbitrary expressions as keys. 1555 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1556 # When False, keys are typically restricted to identifiers. 1557 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1558 1559 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1560 # is true for Snowflake but not for BigQuery which can also process strings 1561 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1562 1563 # Dialects like Databricks support JOINS without join criteria 1564 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1565 ADD_JOIN_ON_TRUE = False 1566 1567 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1568 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1569 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1570 1571 __slots__ = ( 1572 "error_level", 1573 "error_message_context", 1574 "max_errors", 1575 "dialect", 1576 "sql", 1577 "errors", 1578 "_tokens", 1579 "_index", 1580 "_curr", 1581 "_next", 1582 "_prev", 1583 "_prev_comments", 1584 "_pipe_cte_counter", 1585 ) 1586 1587 # Autofilled 1588 SHOW_TRIE: t.Dict = {} 1589 SET_TRIE: t.Dict = {} 1590 1591 def __init__( 1592 self, 1593 error_level: t.Optional[ErrorLevel] = None, 1594 error_message_context: int = 100, 1595 max_errors: int = 3, 1596 dialect: DialectType = None, 1597 ): 1598 from sqlglot.dialects import Dialect 1599 1600 self.error_level = error_level or ErrorLevel.IMMEDIATE 1601 self.error_message_context = error_message_context 1602 self.max_errors = max_errors 1603 self.dialect = Dialect.get_or_raise(dialect) 1604 self.reset() 1605 1606 def reset(self): 1607 self.sql = "" 1608 self.errors = [] 1609 self._tokens = [] 1610 self._index = 0 1611 self._curr = None 1612 self._next = None 1613 self._prev = None 1614 self._prev_comments = None 1615 self._pipe_cte_counter = 0 1616 1617 def parse( 1618 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1619 ) -> t.List[t.Optional[exp.Expression]]: 1620 """ 1621 Parses a list of tokens and returns a list of syntax trees, one tree 1622 per parsed SQL statement. 1623 1624 Args: 1625 raw_tokens: The list of tokens. 1626 sql: The original SQL string, used to produce helpful debug messages. 1627 1628 Returns: 1629 The list of the produced syntax trees. 1630 """ 1631 return self._parse( 1632 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1633 ) 1634 1635 def parse_into( 1636 self, 1637 expression_types: exp.IntoType, 1638 raw_tokens: t.List[Token], 1639 sql: t.Optional[str] = None, 1640 ) -> t.List[t.Optional[exp.Expression]]: 1641 """ 1642 Parses a list of tokens into a given Expression type. If a collection of Expression 1643 types is given instead, this method will try to parse the token list into each one 1644 of them, stopping at the first for which the parsing succeeds. 1645 1646 Args: 1647 expression_types: The expression type(s) to try and parse the token list into. 1648 raw_tokens: The list of tokens. 1649 sql: The original SQL string, used to produce helpful debug messages. 1650 1651 Returns: 1652 The target Expression. 1653 """ 1654 errors = [] 1655 for expression_type in ensure_list(expression_types): 1656 parser = self.EXPRESSION_PARSERS.get(expression_type) 1657 if not parser: 1658 raise TypeError(f"No parser registered for {expression_type}") 1659 1660 try: 1661 return self._parse(parser, raw_tokens, sql) 1662 except ParseError as e: 1663 e.errors[0]["into_expression"] = expression_type 1664 errors.append(e) 1665 1666 raise ParseError( 1667 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1668 errors=merge_errors(errors), 1669 ) from errors[-1] 1670 1671 def _parse( 1672 self, 1673 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1674 raw_tokens: t.List[Token], 1675 sql: t.Optional[str] = None, 1676 ) -> t.List[t.Optional[exp.Expression]]: 1677 self.reset() 1678 self.sql = sql or "" 1679 1680 total = len(raw_tokens) 1681 chunks: t.List[t.List[Token]] = [[]] 1682 1683 for i, token in enumerate(raw_tokens): 1684 if token.token_type == TokenType.SEMICOLON: 1685 if token.comments: 1686 chunks.append([token]) 1687 1688 if i < total - 1: 1689 chunks.append([]) 1690 else: 1691 chunks[-1].append(token) 1692 1693 expressions = [] 1694 1695 for tokens in chunks: 1696 self._index = -1 1697 self._tokens = tokens 1698 self._advance() 1699 1700 expressions.append(parse_method(self)) 1701 1702 if self._index < len(self._tokens): 1703 self.raise_error("Invalid expression / Unexpected token") 1704 1705 self.check_errors() 1706 1707 return expressions 1708 1709 def check_errors(self) -> None: 1710 """Logs or raises any found errors, depending on the chosen error level setting.""" 1711 if self.error_level == ErrorLevel.WARN: 1712 for error in self.errors: 1713 logger.error(str(error)) 1714 elif self.error_level == ErrorLevel.RAISE and self.errors: 1715 raise ParseError( 1716 concat_messages(self.errors, self.max_errors), 1717 errors=merge_errors(self.errors), 1718 ) 1719 1720 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1721 """ 1722 Appends an error in the list of recorded errors or raises it, depending on the chosen 1723 error level setting. 1724 """ 1725 token = token or self._curr or self._prev or Token.string("") 1726 start = token.start 1727 end = token.end + 1 1728 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1729 highlight = self.sql[start:end] 1730 end_context = self.sql[end : end + self.error_message_context] 1731 1732 error = ParseError.new( 1733 f"{message}. Line {token.line}, Col: {token.col}.\n" 1734 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1735 description=message, 1736 line=token.line, 1737 col=token.col, 1738 start_context=start_context, 1739 highlight=highlight, 1740 end_context=end_context, 1741 ) 1742 1743 if self.error_level == ErrorLevel.IMMEDIATE: 1744 raise error 1745 1746 self.errors.append(error) 1747 1748 def expression( 1749 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1750 ) -> E: 1751 """ 1752 Creates a new, validated Expression. 1753 1754 Args: 1755 exp_class: The expression class to instantiate. 1756 comments: An optional list of comments to attach to the expression. 1757 kwargs: The arguments to set for the expression along with their respective values. 1758 1759 Returns: 1760 The target expression. 1761 """ 1762 instance = exp_class(**kwargs) 1763 instance.add_comments(comments) if comments else self._add_comments(instance) 1764 return self.validate_expression(instance) 1765 1766 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1767 if expression and self._prev_comments: 1768 expression.add_comments(self._prev_comments) 1769 self._prev_comments = None 1770 1771 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1772 """ 1773 Validates an Expression, making sure that all its mandatory arguments are set. 1774 1775 Args: 1776 expression: The expression to validate. 1777 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1778 1779 Returns: 1780 The validated expression. 1781 """ 1782 if self.error_level != ErrorLevel.IGNORE: 1783 for error_message in expression.error_messages(args): 1784 self.raise_error(error_message) 1785 1786 return expression 1787 1788 def _find_sql(self, start: Token, end: Token) -> str: 1789 return self.sql[start.start : end.end + 1] 1790 1791 def _is_connected(self) -> bool: 1792 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1793 1794 def _advance(self, times: int = 1) -> None: 1795 self._index += times 1796 self._curr = seq_get(self._tokens, self._index) 1797 self._next = seq_get(self._tokens, self._index + 1) 1798 1799 if self._index > 0: 1800 self._prev = self._tokens[self._index - 1] 1801 self._prev_comments = self._prev.comments 1802 else: 1803 self._prev = None 1804 self._prev_comments = None 1805 1806 def _retreat(self, index: int) -> None: 1807 if index != self._index: 1808 self._advance(index - self._index) 1809 1810 def _warn_unsupported(self) -> None: 1811 if len(self._tokens) <= 1: 1812 return 1813 1814 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1815 # interested in emitting a warning for the one being currently processed. 1816 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1817 1818 logger.warning( 1819 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1820 ) 1821 1822 def _parse_command(self) -> exp.Command: 1823 self._warn_unsupported() 1824 return self.expression( 1825 exp.Command, 1826 comments=self._prev_comments, 1827 this=self._prev.text.upper(), 1828 expression=self._parse_string(), 1829 ) 1830 1831 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1832 """ 1833 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1834 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1835 solve this by setting & resetting the parser state accordingly 1836 """ 1837 index = self._index 1838 error_level = self.error_level 1839 1840 self.error_level = ErrorLevel.IMMEDIATE 1841 try: 1842 this = parse_method() 1843 except ParseError: 1844 this = None 1845 finally: 1846 if not this or retreat: 1847 self._retreat(index) 1848 self.error_level = error_level 1849 1850 return this 1851 1852 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1853 start = self._prev 1854 exists = self._parse_exists() if allow_exists else None 1855 1856 self._match(TokenType.ON) 1857 1858 materialized = self._match_text_seq("MATERIALIZED") 1859 kind = self._match_set(self.CREATABLES) and self._prev 1860 if not kind: 1861 return self._parse_as_command(start) 1862 1863 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1864 this = self._parse_user_defined_function(kind=kind.token_type) 1865 elif kind.token_type == TokenType.TABLE: 1866 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1867 elif kind.token_type == TokenType.COLUMN: 1868 this = self._parse_column() 1869 else: 1870 this = self._parse_id_var() 1871 1872 self._match(TokenType.IS) 1873 1874 return self.expression( 1875 exp.Comment, 1876 this=this, 1877 kind=kind.text, 1878 expression=self._parse_string(), 1879 exists=exists, 1880 materialized=materialized, 1881 ) 1882 1883 def _parse_to_table( 1884 self, 1885 ) -> exp.ToTableProperty: 1886 table = self._parse_table_parts(schema=True) 1887 return self.expression(exp.ToTableProperty, this=table) 1888 1889 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1890 def _parse_ttl(self) -> exp.Expression: 1891 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1892 this = self._parse_bitwise() 1893 1894 if self._match_text_seq("DELETE"): 1895 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1896 if self._match_text_seq("RECOMPRESS"): 1897 return self.expression( 1898 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1899 ) 1900 if self._match_text_seq("TO", "DISK"): 1901 return self.expression( 1902 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1903 ) 1904 if self._match_text_seq("TO", "VOLUME"): 1905 return self.expression( 1906 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1907 ) 1908 1909 return this 1910 1911 expressions = self._parse_csv(_parse_ttl_action) 1912 where = self._parse_where() 1913 group = self._parse_group() 1914 1915 aggregates = None 1916 if group and self._match(TokenType.SET): 1917 aggregates = self._parse_csv(self._parse_set_item) 1918 1919 return self.expression( 1920 exp.MergeTreeTTL, 1921 expressions=expressions, 1922 where=where, 1923 group=group, 1924 aggregates=aggregates, 1925 ) 1926 1927 def _parse_statement(self) -> t.Optional[exp.Expression]: 1928 if self._curr is None: 1929 return None 1930 1931 if self._match_set(self.STATEMENT_PARSERS): 1932 comments = self._prev_comments 1933 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1934 stmt.add_comments(comments, prepend=True) 1935 return stmt 1936 1937 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1938 return self._parse_command() 1939 1940 expression = self._parse_expression() 1941 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1942 return self._parse_query_modifiers(expression) 1943 1944 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1945 start = self._prev 1946 temporary = self._match(TokenType.TEMPORARY) 1947 materialized = self._match_text_seq("MATERIALIZED") 1948 1949 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1950 if not kind: 1951 return self._parse_as_command(start) 1952 1953 concurrently = self._match_text_seq("CONCURRENTLY") 1954 if_exists = exists or self._parse_exists() 1955 1956 if kind == "COLUMN": 1957 this = self._parse_column() 1958 else: 1959 this = self._parse_table_parts( 1960 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1961 ) 1962 1963 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1964 1965 if self._match(TokenType.L_PAREN, advance=False): 1966 expressions = self._parse_wrapped_csv(self._parse_types) 1967 else: 1968 expressions = None 1969 1970 return self.expression( 1971 exp.Drop, 1972 exists=if_exists, 1973 this=this, 1974 expressions=expressions, 1975 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1976 temporary=temporary, 1977 materialized=materialized, 1978 cascade=self._match_text_seq("CASCADE"), 1979 constraints=self._match_text_seq("CONSTRAINTS"), 1980 purge=self._match_text_seq("PURGE"), 1981 cluster=cluster, 1982 concurrently=concurrently, 1983 ) 1984 1985 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1986 return ( 1987 self._match_text_seq("IF") 1988 and (not not_ or self._match(TokenType.NOT)) 1989 and self._match(TokenType.EXISTS) 1990 ) 1991 1992 def _parse_create(self) -> exp.Create | exp.Command: 1993 # Note: this can't be None because we've matched a statement parser 1994 start = self._prev 1995 1996 replace = ( 1997 start.token_type == TokenType.REPLACE 1998 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1999 or self._match_pair(TokenType.OR, TokenType.ALTER) 2000 ) 2001 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2002 2003 unique = self._match(TokenType.UNIQUE) 2004 2005 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2006 clustered = True 2007 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2008 "COLUMNSTORE" 2009 ): 2010 clustered = False 2011 else: 2012 clustered = None 2013 2014 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2015 self._advance() 2016 2017 properties = None 2018 create_token = self._match_set(self.CREATABLES) and self._prev 2019 2020 if not create_token: 2021 # exp.Properties.Location.POST_CREATE 2022 properties = self._parse_properties() 2023 create_token = self._match_set(self.CREATABLES) and self._prev 2024 2025 if not properties or not create_token: 2026 return self._parse_as_command(start) 2027 2028 concurrently = self._match_text_seq("CONCURRENTLY") 2029 exists = self._parse_exists(not_=True) 2030 this = None 2031 expression: t.Optional[exp.Expression] = None 2032 indexes = None 2033 no_schema_binding = None 2034 begin = None 2035 end = None 2036 clone = None 2037 2038 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2039 nonlocal properties 2040 if properties and temp_props: 2041 properties.expressions.extend(temp_props.expressions) 2042 elif temp_props: 2043 properties = temp_props 2044 2045 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2046 this = self._parse_user_defined_function(kind=create_token.token_type) 2047 2048 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2049 extend_props(self._parse_properties()) 2050 2051 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2052 extend_props(self._parse_properties()) 2053 2054 if not expression: 2055 if self._match(TokenType.COMMAND): 2056 expression = self._parse_as_command(self._prev) 2057 else: 2058 begin = self._match(TokenType.BEGIN) 2059 return_ = self._match_text_seq("RETURN") 2060 2061 if self._match(TokenType.STRING, advance=False): 2062 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2063 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2064 expression = self._parse_string() 2065 extend_props(self._parse_properties()) 2066 else: 2067 expression = self._parse_user_defined_function_expression() 2068 2069 end = self._match_text_seq("END") 2070 2071 if return_: 2072 expression = self.expression(exp.Return, this=expression) 2073 elif create_token.token_type == TokenType.INDEX: 2074 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2075 if not self._match(TokenType.ON): 2076 index = self._parse_id_var() 2077 anonymous = False 2078 else: 2079 index = None 2080 anonymous = True 2081 2082 this = self._parse_index(index=index, anonymous=anonymous) 2083 elif create_token.token_type in self.DB_CREATABLES: 2084 table_parts = self._parse_table_parts( 2085 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2086 ) 2087 2088 # exp.Properties.Location.POST_NAME 2089 self._match(TokenType.COMMA) 2090 extend_props(self._parse_properties(before=True)) 2091 2092 this = self._parse_schema(this=table_parts) 2093 2094 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2095 extend_props(self._parse_properties()) 2096 2097 has_alias = self._match(TokenType.ALIAS) 2098 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2099 # exp.Properties.Location.POST_ALIAS 2100 extend_props(self._parse_properties()) 2101 2102 if create_token.token_type == TokenType.SEQUENCE: 2103 expression = self._parse_types() 2104 props = self._parse_properties() 2105 if props: 2106 sequence_props = exp.SequenceProperties() 2107 options = [] 2108 for prop in props: 2109 if isinstance(prop, exp.SequenceProperties): 2110 for arg, value in prop.args.items(): 2111 if arg == "options": 2112 options.extend(value) 2113 else: 2114 sequence_props.set(arg, value) 2115 prop.pop() 2116 2117 if options: 2118 sequence_props.set("options", options) 2119 2120 props.append("expressions", sequence_props) 2121 extend_props(props) 2122 else: 2123 expression = self._parse_ddl_select() 2124 2125 # Some dialects also support using a table as an alias instead of a SELECT. 2126 # Here we fallback to this as an alternative. 2127 if not expression and has_alias: 2128 expression = self._try_parse(self._parse_table_parts) 2129 2130 if create_token.token_type == TokenType.TABLE: 2131 # exp.Properties.Location.POST_EXPRESSION 2132 extend_props(self._parse_properties()) 2133 2134 indexes = [] 2135 while True: 2136 index = self._parse_index() 2137 2138 # exp.Properties.Location.POST_INDEX 2139 extend_props(self._parse_properties()) 2140 if not index: 2141 break 2142 else: 2143 self._match(TokenType.COMMA) 2144 indexes.append(index) 2145 elif create_token.token_type == TokenType.VIEW: 2146 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2147 no_schema_binding = True 2148 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2149 extend_props(self._parse_properties()) 2150 2151 shallow = self._match_text_seq("SHALLOW") 2152 2153 if self._match_texts(self.CLONE_KEYWORDS): 2154 copy = self._prev.text.lower() == "copy" 2155 clone = self.expression( 2156 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2157 ) 2158 2159 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2160 return self._parse_as_command(start) 2161 2162 create_kind_text = create_token.text.upper() 2163 return self.expression( 2164 exp.Create, 2165 this=this, 2166 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2167 replace=replace, 2168 refresh=refresh, 2169 unique=unique, 2170 expression=expression, 2171 exists=exists, 2172 properties=properties, 2173 indexes=indexes, 2174 no_schema_binding=no_schema_binding, 2175 begin=begin, 2176 end=end, 2177 clone=clone, 2178 concurrently=concurrently, 2179 clustered=clustered, 2180 ) 2181 2182 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2183 seq = exp.SequenceProperties() 2184 2185 options = [] 2186 index = self._index 2187 2188 while self._curr: 2189 self._match(TokenType.COMMA) 2190 if self._match_text_seq("INCREMENT"): 2191 self._match_text_seq("BY") 2192 self._match_text_seq("=") 2193 seq.set("increment", self._parse_term()) 2194 elif self._match_text_seq("MINVALUE"): 2195 seq.set("minvalue", self._parse_term()) 2196 elif self._match_text_seq("MAXVALUE"): 2197 seq.set("maxvalue", self._parse_term()) 2198 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2199 self._match_text_seq("=") 2200 seq.set("start", self._parse_term()) 2201 elif self._match_text_seq("CACHE"): 2202 # T-SQL allows empty CACHE which is initialized dynamically 2203 seq.set("cache", self._parse_number() or True) 2204 elif self._match_text_seq("OWNED", "BY"): 2205 # "OWNED BY NONE" is the default 2206 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2207 else: 2208 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2209 if opt: 2210 options.append(opt) 2211 else: 2212 break 2213 2214 seq.set("options", options if options else None) 2215 return None if self._index == index else seq 2216 2217 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2218 # only used for teradata currently 2219 self._match(TokenType.COMMA) 2220 2221 kwargs = { 2222 "no": self._match_text_seq("NO"), 2223 "dual": self._match_text_seq("DUAL"), 2224 "before": self._match_text_seq("BEFORE"), 2225 "default": self._match_text_seq("DEFAULT"), 2226 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2227 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2228 "after": self._match_text_seq("AFTER"), 2229 "minimum": self._match_texts(("MIN", "MINIMUM")), 2230 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2231 } 2232 2233 if self._match_texts(self.PROPERTY_PARSERS): 2234 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2235 try: 2236 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2237 except TypeError: 2238 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2239 2240 return None 2241 2242 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2243 return self._parse_wrapped_csv(self._parse_property) 2244 2245 def _parse_property(self) -> t.Optional[exp.Expression]: 2246 if self._match_texts(self.PROPERTY_PARSERS): 2247 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2248 2249 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2250 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2251 2252 if self._match_text_seq("COMPOUND", "SORTKEY"): 2253 return self._parse_sortkey(compound=True) 2254 2255 if self._match_text_seq("SQL", "SECURITY"): 2256 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2257 2258 index = self._index 2259 2260 seq_props = self._parse_sequence_properties() 2261 if seq_props: 2262 return seq_props 2263 2264 self._retreat(index) 2265 key = self._parse_column() 2266 2267 if not self._match(TokenType.EQ): 2268 self._retreat(index) 2269 return None 2270 2271 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2272 if isinstance(key, exp.Column): 2273 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2274 2275 value = self._parse_bitwise() or self._parse_var(any_token=True) 2276 2277 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2278 if isinstance(value, exp.Column): 2279 value = exp.var(value.name) 2280 2281 return self.expression(exp.Property, this=key, value=value) 2282 2283 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2284 if self._match_text_seq("BY"): 2285 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2286 2287 self._match(TokenType.ALIAS) 2288 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2289 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2290 2291 return self.expression( 2292 exp.FileFormatProperty, 2293 this=( 2294 self.expression( 2295 exp.InputOutputFormat, 2296 input_format=input_format, 2297 output_format=output_format, 2298 ) 2299 if input_format or output_format 2300 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2301 ), 2302 hive_format=True, 2303 ) 2304 2305 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2306 field = self._parse_field() 2307 if isinstance(field, exp.Identifier) and not field.quoted: 2308 field = exp.var(field) 2309 2310 return field 2311 2312 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2313 self._match(TokenType.EQ) 2314 self._match(TokenType.ALIAS) 2315 2316 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2317 2318 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2319 properties = [] 2320 while True: 2321 if before: 2322 prop = self._parse_property_before() 2323 else: 2324 prop = self._parse_property() 2325 if not prop: 2326 break 2327 for p in ensure_list(prop): 2328 properties.append(p) 2329 2330 if properties: 2331 return self.expression(exp.Properties, expressions=properties) 2332 2333 return None 2334 2335 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2336 return self.expression( 2337 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2338 ) 2339 2340 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2341 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2342 security_specifier = self._prev.text.upper() 2343 return self.expression(exp.SecurityProperty, this=security_specifier) 2344 return None 2345 2346 def _parse_settings_property(self) -> exp.SettingsProperty: 2347 return self.expression( 2348 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2349 ) 2350 2351 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2352 if self._index >= 2: 2353 pre_volatile_token = self._tokens[self._index - 2] 2354 else: 2355 pre_volatile_token = None 2356 2357 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2358 return exp.VolatileProperty() 2359 2360 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2361 2362 def _parse_retention_period(self) -> exp.Var: 2363 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2364 number = self._parse_number() 2365 number_str = f"{number} " if number else "" 2366 unit = self._parse_var(any_token=True) 2367 return exp.var(f"{number_str}{unit}") 2368 2369 def _parse_system_versioning_property( 2370 self, with_: bool = False 2371 ) -> exp.WithSystemVersioningProperty: 2372 self._match(TokenType.EQ) 2373 prop = self.expression( 2374 exp.WithSystemVersioningProperty, 2375 **{ # type: ignore 2376 "on": True, 2377 "with": with_, 2378 }, 2379 ) 2380 2381 if self._match_text_seq("OFF"): 2382 prop.set("on", False) 2383 return prop 2384 2385 self._match(TokenType.ON) 2386 if self._match(TokenType.L_PAREN): 2387 while self._curr and not self._match(TokenType.R_PAREN): 2388 if self._match_text_seq("HISTORY_TABLE", "="): 2389 prop.set("this", self._parse_table_parts()) 2390 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2391 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2392 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2393 prop.set("retention_period", self._parse_retention_period()) 2394 2395 self._match(TokenType.COMMA) 2396 2397 return prop 2398 2399 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2400 self._match(TokenType.EQ) 2401 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2402 prop = self.expression(exp.DataDeletionProperty, on=on) 2403 2404 if self._match(TokenType.L_PAREN): 2405 while self._curr and not self._match(TokenType.R_PAREN): 2406 if self._match_text_seq("FILTER_COLUMN", "="): 2407 prop.set("filter_column", self._parse_column()) 2408 elif self._match_text_seq("RETENTION_PERIOD", "="): 2409 prop.set("retention_period", self._parse_retention_period()) 2410 2411 self._match(TokenType.COMMA) 2412 2413 return prop 2414 2415 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2416 kind = "HASH" 2417 expressions: t.Optional[t.List[exp.Expression]] = None 2418 if self._match_text_seq("BY", "HASH"): 2419 expressions = self._parse_wrapped_csv(self._parse_id_var) 2420 elif self._match_text_seq("BY", "RANDOM"): 2421 kind = "RANDOM" 2422 2423 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2424 buckets: t.Optional[exp.Expression] = None 2425 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2426 buckets = self._parse_number() 2427 2428 return self.expression( 2429 exp.DistributedByProperty, 2430 expressions=expressions, 2431 kind=kind, 2432 buckets=buckets, 2433 order=self._parse_order(), 2434 ) 2435 2436 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2437 self._match_text_seq("KEY") 2438 expressions = self._parse_wrapped_id_vars() 2439 return self.expression(expr_type, expressions=expressions) 2440 2441 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2442 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2443 prop = self._parse_system_versioning_property(with_=True) 2444 self._match_r_paren() 2445 return prop 2446 2447 if self._match(TokenType.L_PAREN, advance=False): 2448 return self._parse_wrapped_properties() 2449 2450 if self._match_text_seq("JOURNAL"): 2451 return self._parse_withjournaltable() 2452 2453 if self._match_texts(self.VIEW_ATTRIBUTES): 2454 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2455 2456 if self._match_text_seq("DATA"): 2457 return self._parse_withdata(no=False) 2458 elif self._match_text_seq("NO", "DATA"): 2459 return self._parse_withdata(no=True) 2460 2461 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2462 return self._parse_serde_properties(with_=True) 2463 2464 if self._match(TokenType.SCHEMA): 2465 return self.expression( 2466 exp.WithSchemaBindingProperty, 2467 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2468 ) 2469 2470 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2471 return self.expression( 2472 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2473 ) 2474 2475 if not self._next: 2476 return None 2477 2478 return self._parse_withisolatedloading() 2479 2480 def _parse_procedure_option(self) -> exp.Expression | None: 2481 if self._match_text_seq("EXECUTE", "AS"): 2482 return self.expression( 2483 exp.ExecuteAsProperty, 2484 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2485 or self._parse_string(), 2486 ) 2487 2488 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2489 2490 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2491 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2492 self._match(TokenType.EQ) 2493 2494 user = self._parse_id_var() 2495 self._match(TokenType.PARAMETER) 2496 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2497 2498 if not user or not host: 2499 return None 2500 2501 return exp.DefinerProperty(this=f"{user}@{host}") 2502 2503 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2504 self._match(TokenType.TABLE) 2505 self._match(TokenType.EQ) 2506 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2507 2508 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2509 return self.expression(exp.LogProperty, no=no) 2510 2511 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2512 return self.expression(exp.JournalProperty, **kwargs) 2513 2514 def _parse_checksum(self) -> exp.ChecksumProperty: 2515 self._match(TokenType.EQ) 2516 2517 on = None 2518 if self._match(TokenType.ON): 2519 on = True 2520 elif self._match_text_seq("OFF"): 2521 on = False 2522 2523 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2524 2525 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2526 return self.expression( 2527 exp.Cluster, 2528 expressions=( 2529 self._parse_wrapped_csv(self._parse_ordered) 2530 if wrapped 2531 else self._parse_csv(self._parse_ordered) 2532 ), 2533 ) 2534 2535 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2536 self._match_text_seq("BY") 2537 2538 self._match_l_paren() 2539 expressions = self._parse_csv(self._parse_column) 2540 self._match_r_paren() 2541 2542 if self._match_text_seq("SORTED", "BY"): 2543 self._match_l_paren() 2544 sorted_by = self._parse_csv(self._parse_ordered) 2545 self._match_r_paren() 2546 else: 2547 sorted_by = None 2548 2549 self._match(TokenType.INTO) 2550 buckets = self._parse_number() 2551 self._match_text_seq("BUCKETS") 2552 2553 return self.expression( 2554 exp.ClusteredByProperty, 2555 expressions=expressions, 2556 sorted_by=sorted_by, 2557 buckets=buckets, 2558 ) 2559 2560 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2561 if not self._match_text_seq("GRANTS"): 2562 self._retreat(self._index - 1) 2563 return None 2564 2565 return self.expression(exp.CopyGrantsProperty) 2566 2567 def _parse_freespace(self) -> exp.FreespaceProperty: 2568 self._match(TokenType.EQ) 2569 return self.expression( 2570 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2571 ) 2572 2573 def _parse_mergeblockratio( 2574 self, no: bool = False, default: bool = False 2575 ) -> exp.MergeBlockRatioProperty: 2576 if self._match(TokenType.EQ): 2577 return self.expression( 2578 exp.MergeBlockRatioProperty, 2579 this=self._parse_number(), 2580 percent=self._match(TokenType.PERCENT), 2581 ) 2582 2583 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2584 2585 def _parse_datablocksize( 2586 self, 2587 default: t.Optional[bool] = None, 2588 minimum: t.Optional[bool] = None, 2589 maximum: t.Optional[bool] = None, 2590 ) -> exp.DataBlocksizeProperty: 2591 self._match(TokenType.EQ) 2592 size = self._parse_number() 2593 2594 units = None 2595 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2596 units = self._prev.text 2597 2598 return self.expression( 2599 exp.DataBlocksizeProperty, 2600 size=size, 2601 units=units, 2602 default=default, 2603 minimum=minimum, 2604 maximum=maximum, 2605 ) 2606 2607 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2608 self._match(TokenType.EQ) 2609 always = self._match_text_seq("ALWAYS") 2610 manual = self._match_text_seq("MANUAL") 2611 never = self._match_text_seq("NEVER") 2612 default = self._match_text_seq("DEFAULT") 2613 2614 autotemp = None 2615 if self._match_text_seq("AUTOTEMP"): 2616 autotemp = self._parse_schema() 2617 2618 return self.expression( 2619 exp.BlockCompressionProperty, 2620 always=always, 2621 manual=manual, 2622 never=never, 2623 default=default, 2624 autotemp=autotemp, 2625 ) 2626 2627 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2628 index = self._index 2629 no = self._match_text_seq("NO") 2630 concurrent = self._match_text_seq("CONCURRENT") 2631 2632 if not self._match_text_seq("ISOLATED", "LOADING"): 2633 self._retreat(index) 2634 return None 2635 2636 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2637 return self.expression( 2638 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2639 ) 2640 2641 def _parse_locking(self) -> exp.LockingProperty: 2642 if self._match(TokenType.TABLE): 2643 kind = "TABLE" 2644 elif self._match(TokenType.VIEW): 2645 kind = "VIEW" 2646 elif self._match(TokenType.ROW): 2647 kind = "ROW" 2648 elif self._match_text_seq("DATABASE"): 2649 kind = "DATABASE" 2650 else: 2651 kind = None 2652 2653 if kind in ("DATABASE", "TABLE", "VIEW"): 2654 this = self._parse_table_parts() 2655 else: 2656 this = None 2657 2658 if self._match(TokenType.FOR): 2659 for_or_in = "FOR" 2660 elif self._match(TokenType.IN): 2661 for_or_in = "IN" 2662 else: 2663 for_or_in = None 2664 2665 if self._match_text_seq("ACCESS"): 2666 lock_type = "ACCESS" 2667 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2668 lock_type = "EXCLUSIVE" 2669 elif self._match_text_seq("SHARE"): 2670 lock_type = "SHARE" 2671 elif self._match_text_seq("READ"): 2672 lock_type = "READ" 2673 elif self._match_text_seq("WRITE"): 2674 lock_type = "WRITE" 2675 elif self._match_text_seq("CHECKSUM"): 2676 lock_type = "CHECKSUM" 2677 else: 2678 lock_type = None 2679 2680 override = self._match_text_seq("OVERRIDE") 2681 2682 return self.expression( 2683 exp.LockingProperty, 2684 this=this, 2685 kind=kind, 2686 for_or_in=for_or_in, 2687 lock_type=lock_type, 2688 override=override, 2689 ) 2690 2691 def _parse_partition_by(self) -> t.List[exp.Expression]: 2692 if self._match(TokenType.PARTITION_BY): 2693 return self._parse_csv(self._parse_assignment) 2694 return [] 2695 2696 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2697 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2698 if self._match_text_seq("MINVALUE"): 2699 return exp.var("MINVALUE") 2700 if self._match_text_seq("MAXVALUE"): 2701 return exp.var("MAXVALUE") 2702 return self._parse_bitwise() 2703 2704 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2705 expression = None 2706 from_expressions = None 2707 to_expressions = None 2708 2709 if self._match(TokenType.IN): 2710 this = self._parse_wrapped_csv(self._parse_bitwise) 2711 elif self._match(TokenType.FROM): 2712 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2713 self._match_text_seq("TO") 2714 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2715 elif self._match_text_seq("WITH", "(", "MODULUS"): 2716 this = self._parse_number() 2717 self._match_text_seq(",", "REMAINDER") 2718 expression = self._parse_number() 2719 self._match_r_paren() 2720 else: 2721 self.raise_error("Failed to parse partition bound spec.") 2722 2723 return self.expression( 2724 exp.PartitionBoundSpec, 2725 this=this, 2726 expression=expression, 2727 from_expressions=from_expressions, 2728 to_expressions=to_expressions, 2729 ) 2730 2731 # https://www.postgresql.org/docs/current/sql-createtable.html 2732 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2733 if not self._match_text_seq("OF"): 2734 self._retreat(self._index - 1) 2735 return None 2736 2737 this = self._parse_table(schema=True) 2738 2739 if self._match(TokenType.DEFAULT): 2740 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2741 elif self._match_text_seq("FOR", "VALUES"): 2742 expression = self._parse_partition_bound_spec() 2743 else: 2744 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2745 2746 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2747 2748 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2749 self._match(TokenType.EQ) 2750 return self.expression( 2751 exp.PartitionedByProperty, 2752 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2753 ) 2754 2755 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2756 if self._match_text_seq("AND", "STATISTICS"): 2757 statistics = True 2758 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2759 statistics = False 2760 else: 2761 statistics = None 2762 2763 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2764 2765 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2766 if self._match_text_seq("SQL"): 2767 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2768 return None 2769 2770 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2771 if self._match_text_seq("SQL", "DATA"): 2772 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2773 return None 2774 2775 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2776 if self._match_text_seq("PRIMARY", "INDEX"): 2777 return exp.NoPrimaryIndexProperty() 2778 if self._match_text_seq("SQL"): 2779 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2780 return None 2781 2782 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2783 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2784 return exp.OnCommitProperty() 2785 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2786 return exp.OnCommitProperty(delete=True) 2787 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2788 2789 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2790 if self._match_text_seq("SQL", "DATA"): 2791 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2792 return None 2793 2794 def _parse_distkey(self) -> exp.DistKeyProperty: 2795 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2796 2797 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2798 table = self._parse_table(schema=True) 2799 2800 options = [] 2801 while self._match_texts(("INCLUDING", "EXCLUDING")): 2802 this = self._prev.text.upper() 2803 2804 id_var = self._parse_id_var() 2805 if not id_var: 2806 return None 2807 2808 options.append( 2809 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2810 ) 2811 2812 return self.expression(exp.LikeProperty, this=table, expressions=options) 2813 2814 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2815 return self.expression( 2816 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2817 ) 2818 2819 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2820 self._match(TokenType.EQ) 2821 return self.expression( 2822 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2823 ) 2824 2825 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2826 self._match_text_seq("WITH", "CONNECTION") 2827 return self.expression( 2828 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2829 ) 2830 2831 def _parse_returns(self) -> exp.ReturnsProperty: 2832 value: t.Optional[exp.Expression] 2833 null = None 2834 is_table = self._match(TokenType.TABLE) 2835 2836 if is_table: 2837 if self._match(TokenType.LT): 2838 value = self.expression( 2839 exp.Schema, 2840 this="TABLE", 2841 expressions=self._parse_csv(self._parse_struct_types), 2842 ) 2843 if not self._match(TokenType.GT): 2844 self.raise_error("Expecting >") 2845 else: 2846 value = self._parse_schema(exp.var("TABLE")) 2847 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2848 null = True 2849 value = None 2850 else: 2851 value = self._parse_types() 2852 2853 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2854 2855 def _parse_describe(self) -> exp.Describe: 2856 kind = self._match_set(self.CREATABLES) and self._prev.text 2857 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2858 if self._match(TokenType.DOT): 2859 style = None 2860 self._retreat(self._index - 2) 2861 2862 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2863 2864 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2865 this = self._parse_statement() 2866 else: 2867 this = self._parse_table(schema=True) 2868 2869 properties = self._parse_properties() 2870 expressions = properties.expressions if properties else None 2871 partition = self._parse_partition() 2872 return self.expression( 2873 exp.Describe, 2874 this=this, 2875 style=style, 2876 kind=kind, 2877 expressions=expressions, 2878 partition=partition, 2879 format=format, 2880 ) 2881 2882 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2883 kind = self._prev.text.upper() 2884 expressions = [] 2885 2886 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2887 if self._match(TokenType.WHEN): 2888 expression = self._parse_disjunction() 2889 self._match(TokenType.THEN) 2890 else: 2891 expression = None 2892 2893 else_ = self._match(TokenType.ELSE) 2894 2895 if not self._match(TokenType.INTO): 2896 return None 2897 2898 return self.expression( 2899 exp.ConditionalInsert, 2900 this=self.expression( 2901 exp.Insert, 2902 this=self._parse_table(schema=True), 2903 expression=self._parse_derived_table_values(), 2904 ), 2905 expression=expression, 2906 else_=else_, 2907 ) 2908 2909 expression = parse_conditional_insert() 2910 while expression is not None: 2911 expressions.append(expression) 2912 expression = parse_conditional_insert() 2913 2914 return self.expression( 2915 exp.MultitableInserts, 2916 kind=kind, 2917 comments=comments, 2918 expressions=expressions, 2919 source=self._parse_table(), 2920 ) 2921 2922 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2923 comments = [] 2924 hint = self._parse_hint() 2925 overwrite = self._match(TokenType.OVERWRITE) 2926 ignore = self._match(TokenType.IGNORE) 2927 local = self._match_text_seq("LOCAL") 2928 alternative = None 2929 is_function = None 2930 2931 if self._match_text_seq("DIRECTORY"): 2932 this: t.Optional[exp.Expression] = self.expression( 2933 exp.Directory, 2934 this=self._parse_var_or_string(), 2935 local=local, 2936 row_format=self._parse_row_format(match_row=True), 2937 ) 2938 else: 2939 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2940 comments += ensure_list(self._prev_comments) 2941 return self._parse_multitable_inserts(comments) 2942 2943 if self._match(TokenType.OR): 2944 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2945 2946 self._match(TokenType.INTO) 2947 comments += ensure_list(self._prev_comments) 2948 self._match(TokenType.TABLE) 2949 is_function = self._match(TokenType.FUNCTION) 2950 2951 this = ( 2952 self._parse_table(schema=True, parse_partition=True) 2953 if not is_function 2954 else self._parse_function() 2955 ) 2956 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2957 this.set("alias", self._parse_table_alias()) 2958 2959 returning = self._parse_returning() 2960 2961 return self.expression( 2962 exp.Insert, 2963 comments=comments, 2964 hint=hint, 2965 is_function=is_function, 2966 this=this, 2967 stored=self._match_text_seq("STORED") and self._parse_stored(), 2968 by_name=self._match_text_seq("BY", "NAME"), 2969 exists=self._parse_exists(), 2970 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2971 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2972 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2973 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2974 conflict=self._parse_on_conflict(), 2975 returning=returning or self._parse_returning(), 2976 overwrite=overwrite, 2977 alternative=alternative, 2978 ignore=ignore, 2979 source=self._match(TokenType.TABLE) and self._parse_table(), 2980 ) 2981 2982 def _parse_kill(self) -> exp.Kill: 2983 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2984 2985 return self.expression( 2986 exp.Kill, 2987 this=self._parse_primary(), 2988 kind=kind, 2989 ) 2990 2991 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2992 conflict = self._match_text_seq("ON", "CONFLICT") 2993 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2994 2995 if not conflict and not duplicate: 2996 return None 2997 2998 conflict_keys = None 2999 constraint = None 3000 3001 if conflict: 3002 if self._match_text_seq("ON", "CONSTRAINT"): 3003 constraint = self._parse_id_var() 3004 elif self._match(TokenType.L_PAREN): 3005 conflict_keys = self._parse_csv(self._parse_id_var) 3006 self._match_r_paren() 3007 3008 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3009 if self._prev.token_type == TokenType.UPDATE: 3010 self._match(TokenType.SET) 3011 expressions = self._parse_csv(self._parse_equality) 3012 else: 3013 expressions = None 3014 3015 return self.expression( 3016 exp.OnConflict, 3017 duplicate=duplicate, 3018 expressions=expressions, 3019 action=action, 3020 conflict_keys=conflict_keys, 3021 constraint=constraint, 3022 where=self._parse_where(), 3023 ) 3024 3025 def _parse_returning(self) -> t.Optional[exp.Returning]: 3026 if not self._match(TokenType.RETURNING): 3027 return None 3028 return self.expression( 3029 exp.Returning, 3030 expressions=self._parse_csv(self._parse_expression), 3031 into=self._match(TokenType.INTO) and self._parse_table_part(), 3032 ) 3033 3034 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3035 if not self._match(TokenType.FORMAT): 3036 return None 3037 return self._parse_row_format() 3038 3039 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3040 index = self._index 3041 with_ = with_ or self._match_text_seq("WITH") 3042 3043 if not self._match(TokenType.SERDE_PROPERTIES): 3044 self._retreat(index) 3045 return None 3046 return self.expression( 3047 exp.SerdeProperties, 3048 **{ # type: ignore 3049 "expressions": self._parse_wrapped_properties(), 3050 "with": with_, 3051 }, 3052 ) 3053 3054 def _parse_row_format( 3055 self, match_row: bool = False 3056 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3057 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3058 return None 3059 3060 if self._match_text_seq("SERDE"): 3061 this = self._parse_string() 3062 3063 serde_properties = self._parse_serde_properties() 3064 3065 return self.expression( 3066 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3067 ) 3068 3069 self._match_text_seq("DELIMITED") 3070 3071 kwargs = {} 3072 3073 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3074 kwargs["fields"] = self._parse_string() 3075 if self._match_text_seq("ESCAPED", "BY"): 3076 kwargs["escaped"] = self._parse_string() 3077 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3078 kwargs["collection_items"] = self._parse_string() 3079 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3080 kwargs["map_keys"] = self._parse_string() 3081 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3082 kwargs["lines"] = self._parse_string() 3083 if self._match_text_seq("NULL", "DEFINED", "AS"): 3084 kwargs["null"] = self._parse_string() 3085 3086 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3087 3088 def _parse_load(self) -> exp.LoadData | exp.Command: 3089 if self._match_text_seq("DATA"): 3090 local = self._match_text_seq("LOCAL") 3091 self._match_text_seq("INPATH") 3092 inpath = self._parse_string() 3093 overwrite = self._match(TokenType.OVERWRITE) 3094 self._match_pair(TokenType.INTO, TokenType.TABLE) 3095 3096 return self.expression( 3097 exp.LoadData, 3098 this=self._parse_table(schema=True), 3099 local=local, 3100 overwrite=overwrite, 3101 inpath=inpath, 3102 partition=self._parse_partition(), 3103 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3104 serde=self._match_text_seq("SERDE") and self._parse_string(), 3105 ) 3106 return self._parse_as_command(self._prev) 3107 3108 def _parse_delete(self) -> exp.Delete: 3109 # This handles MySQL's "Multiple-Table Syntax" 3110 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3111 tables = None 3112 if not self._match(TokenType.FROM, advance=False): 3113 tables = self._parse_csv(self._parse_table) or None 3114 3115 returning = self._parse_returning() 3116 3117 return self.expression( 3118 exp.Delete, 3119 tables=tables, 3120 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3121 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3122 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3123 where=self._parse_where(), 3124 returning=returning or self._parse_returning(), 3125 limit=self._parse_limit(), 3126 ) 3127 3128 def _parse_update(self) -> exp.Update: 3129 kwargs: t.Dict[str, t.Any] = { 3130 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3131 } 3132 while self._curr: 3133 if self._match(TokenType.SET): 3134 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3135 elif self._match(TokenType.RETURNING, advance=False): 3136 kwargs["returning"] = self._parse_returning() 3137 elif self._match(TokenType.FROM, advance=False): 3138 kwargs["from"] = self._parse_from(joins=True) 3139 elif self._match(TokenType.WHERE, advance=False): 3140 kwargs["where"] = self._parse_where() 3141 elif self._match(TokenType.ORDER_BY, advance=False): 3142 kwargs["order"] = self._parse_order() 3143 elif self._match(TokenType.LIMIT, advance=False): 3144 kwargs["limit"] = self._parse_limit() 3145 else: 3146 break 3147 3148 return self.expression(exp.Update, **kwargs) 3149 3150 def _parse_use(self) -> exp.Use: 3151 return self.expression( 3152 exp.Use, 3153 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3154 this=self._parse_table(schema=False), 3155 ) 3156 3157 def _parse_uncache(self) -> exp.Uncache: 3158 if not self._match(TokenType.TABLE): 3159 self.raise_error("Expecting TABLE after UNCACHE") 3160 3161 return self.expression( 3162 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3163 ) 3164 3165 def _parse_cache(self) -> exp.Cache: 3166 lazy = self._match_text_seq("LAZY") 3167 self._match(TokenType.TABLE) 3168 table = self._parse_table(schema=True) 3169 3170 options = [] 3171 if self._match_text_seq("OPTIONS"): 3172 self._match_l_paren() 3173 k = self._parse_string() 3174 self._match(TokenType.EQ) 3175 v = self._parse_string() 3176 options = [k, v] 3177 self._match_r_paren() 3178 3179 self._match(TokenType.ALIAS) 3180 return self.expression( 3181 exp.Cache, 3182 this=table, 3183 lazy=lazy, 3184 options=options, 3185 expression=self._parse_select(nested=True), 3186 ) 3187 3188 def _parse_partition(self) -> t.Optional[exp.Partition]: 3189 if not self._match_texts(self.PARTITION_KEYWORDS): 3190 return None 3191 3192 return self.expression( 3193 exp.Partition, 3194 subpartition=self._prev.text.upper() == "SUBPARTITION", 3195 expressions=self._parse_wrapped_csv(self._parse_assignment), 3196 ) 3197 3198 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3199 def _parse_value_expression() -> t.Optional[exp.Expression]: 3200 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3201 return exp.var(self._prev.text.upper()) 3202 return self._parse_expression() 3203 3204 if self._match(TokenType.L_PAREN): 3205 expressions = self._parse_csv(_parse_value_expression) 3206 self._match_r_paren() 3207 return self.expression(exp.Tuple, expressions=expressions) 3208 3209 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3210 expression = self._parse_expression() 3211 if expression: 3212 return self.expression(exp.Tuple, expressions=[expression]) 3213 return None 3214 3215 def _parse_projections(self) -> t.List[exp.Expression]: 3216 return self._parse_expressions() 3217 3218 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3219 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3220 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3221 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3222 ) 3223 elif self._match(TokenType.FROM): 3224 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3225 # Support parentheses for duckdb FROM-first syntax 3226 select = self._parse_select(from_=from_) 3227 if select: 3228 if not select.args.get("from"): 3229 select.set("from", from_) 3230 this = select 3231 else: 3232 this = exp.select("*").from_(t.cast(exp.From, from_)) 3233 else: 3234 this = ( 3235 self._parse_table(consume_pipe=True) 3236 if table 3237 else self._parse_select(nested=True, parse_set_operation=False) 3238 ) 3239 3240 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3241 # in case a modifier (e.g. join) is following 3242 if table and isinstance(this, exp.Values) and this.alias: 3243 alias = this.args["alias"].pop() 3244 this = exp.Table(this=this, alias=alias) 3245 3246 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3247 3248 return this 3249 3250 def _parse_select( 3251 self, 3252 nested: bool = False, 3253 table: bool = False, 3254 parse_subquery_alias: bool = True, 3255 parse_set_operation: bool = True, 3256 consume_pipe: bool = True, 3257 from_: t.Optional[exp.From] = None, 3258 ) -> t.Optional[exp.Expression]: 3259 query = self._parse_select_query( 3260 nested=nested, 3261 table=table, 3262 parse_subquery_alias=parse_subquery_alias, 3263 parse_set_operation=parse_set_operation, 3264 ) 3265 3266 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3267 if not query and from_: 3268 query = exp.select("*").from_(from_) 3269 if isinstance(query, exp.Query): 3270 query = self._parse_pipe_syntax_query(query) 3271 query = query.subquery(copy=False) if query and table else query 3272 3273 return query 3274 3275 def _parse_select_query( 3276 self, 3277 nested: bool = False, 3278 table: bool = False, 3279 parse_subquery_alias: bool = True, 3280 parse_set_operation: bool = True, 3281 ) -> t.Optional[exp.Expression]: 3282 cte = self._parse_with() 3283 3284 if cte: 3285 this = self._parse_statement() 3286 3287 if not this: 3288 self.raise_error("Failed to parse any statement following CTE") 3289 return cte 3290 3291 if "with" in this.arg_types: 3292 this.set("with", cte) 3293 else: 3294 self.raise_error(f"{this.key} does not support CTE") 3295 this = cte 3296 3297 return this 3298 3299 # duckdb supports leading with FROM x 3300 from_ = ( 3301 self._parse_from(consume_pipe=True) 3302 if self._match(TokenType.FROM, advance=False) 3303 else None 3304 ) 3305 3306 if self._match(TokenType.SELECT): 3307 comments = self._prev_comments 3308 3309 hint = self._parse_hint() 3310 3311 if self._next and not self._next.token_type == TokenType.DOT: 3312 all_ = self._match(TokenType.ALL) 3313 distinct = self._match_set(self.DISTINCT_TOKENS) 3314 else: 3315 all_, distinct = None, None 3316 3317 kind = ( 3318 self._match(TokenType.ALIAS) 3319 and self._match_texts(("STRUCT", "VALUE")) 3320 and self._prev.text.upper() 3321 ) 3322 3323 if distinct: 3324 distinct = self.expression( 3325 exp.Distinct, 3326 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3327 ) 3328 3329 if all_ and distinct: 3330 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3331 3332 operation_modifiers = [] 3333 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3334 operation_modifiers.append(exp.var(self._prev.text.upper())) 3335 3336 limit = self._parse_limit(top=True) 3337 projections = self._parse_projections() 3338 3339 this = self.expression( 3340 exp.Select, 3341 kind=kind, 3342 hint=hint, 3343 distinct=distinct, 3344 expressions=projections, 3345 limit=limit, 3346 operation_modifiers=operation_modifiers or None, 3347 ) 3348 this.comments = comments 3349 3350 into = self._parse_into() 3351 if into: 3352 this.set("into", into) 3353 3354 if not from_: 3355 from_ = self._parse_from() 3356 3357 if from_: 3358 this.set("from", from_) 3359 3360 this = self._parse_query_modifiers(this) 3361 elif (table or nested) and self._match(TokenType.L_PAREN): 3362 this = self._parse_wrapped_select(table=table) 3363 3364 # We return early here so that the UNION isn't attached to the subquery by the 3365 # following call to _parse_set_operations, but instead becomes the parent node 3366 self._match_r_paren() 3367 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3368 elif self._match(TokenType.VALUES, advance=False): 3369 this = self._parse_derived_table_values() 3370 elif from_: 3371 this = exp.select("*").from_(from_.this, copy=False) 3372 elif self._match(TokenType.SUMMARIZE): 3373 table = self._match(TokenType.TABLE) 3374 this = self._parse_select() or self._parse_string() or self._parse_table() 3375 return self.expression(exp.Summarize, this=this, table=table) 3376 elif self._match(TokenType.DESCRIBE): 3377 this = self._parse_describe() 3378 elif self._match_text_seq("STREAM"): 3379 this = self._parse_function() 3380 if this: 3381 this = self.expression(exp.Stream, this=this) 3382 else: 3383 self._retreat(self._index - 1) 3384 else: 3385 this = None 3386 3387 return self._parse_set_operations(this) if parse_set_operation else this 3388 3389 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3390 self._match_text_seq("SEARCH") 3391 3392 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3393 3394 if not kind: 3395 return None 3396 3397 self._match_text_seq("FIRST", "BY") 3398 3399 return self.expression( 3400 exp.RecursiveWithSearch, 3401 kind=kind, 3402 this=self._parse_id_var(), 3403 expression=self._match_text_seq("SET") and self._parse_id_var(), 3404 using=self._match_text_seq("USING") and self._parse_id_var(), 3405 ) 3406 3407 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3408 if not skip_with_token and not self._match(TokenType.WITH): 3409 return None 3410 3411 comments = self._prev_comments 3412 recursive = self._match(TokenType.RECURSIVE) 3413 3414 last_comments = None 3415 expressions = [] 3416 while True: 3417 cte = self._parse_cte() 3418 if isinstance(cte, exp.CTE): 3419 expressions.append(cte) 3420 if last_comments: 3421 cte.add_comments(last_comments) 3422 3423 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3424 break 3425 else: 3426 self._match(TokenType.WITH) 3427 3428 last_comments = self._prev_comments 3429 3430 return self.expression( 3431 exp.With, 3432 comments=comments, 3433 expressions=expressions, 3434 recursive=recursive, 3435 search=self._parse_recursive_with_search(), 3436 ) 3437 3438 def _parse_cte(self) -> t.Optional[exp.CTE]: 3439 index = self._index 3440 3441 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3442 if not alias or not alias.this: 3443 self.raise_error("Expected CTE to have alias") 3444 3445 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3446 self._retreat(index) 3447 return None 3448 3449 comments = self._prev_comments 3450 3451 if self._match_text_seq("NOT", "MATERIALIZED"): 3452 materialized = False 3453 elif self._match_text_seq("MATERIALIZED"): 3454 materialized = True 3455 else: 3456 materialized = None 3457 3458 cte = self.expression( 3459 exp.CTE, 3460 this=self._parse_wrapped(self._parse_statement), 3461 alias=alias, 3462 materialized=materialized, 3463 comments=comments, 3464 ) 3465 3466 values = cte.this 3467 if isinstance(values, exp.Values): 3468 if values.alias: 3469 cte.set("this", exp.select("*").from_(values)) 3470 else: 3471 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3472 3473 return cte 3474 3475 def _parse_table_alias( 3476 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3477 ) -> t.Optional[exp.TableAlias]: 3478 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3479 # so this section tries to parse the clause version and if it fails, it treats the token 3480 # as an identifier (alias) 3481 if self._can_parse_limit_or_offset(): 3482 return None 3483 3484 any_token = self._match(TokenType.ALIAS) 3485 alias = ( 3486 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3487 or self._parse_string_as_identifier() 3488 ) 3489 3490 index = self._index 3491 if self._match(TokenType.L_PAREN): 3492 columns = self._parse_csv(self._parse_function_parameter) 3493 self._match_r_paren() if columns else self._retreat(index) 3494 else: 3495 columns = None 3496 3497 if not alias and not columns: 3498 return None 3499 3500 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3501 3502 # We bubble up comments from the Identifier to the TableAlias 3503 if isinstance(alias, exp.Identifier): 3504 table_alias.add_comments(alias.pop_comments()) 3505 3506 return table_alias 3507 3508 def _parse_subquery( 3509 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3510 ) -> t.Optional[exp.Subquery]: 3511 if not this: 3512 return None 3513 3514 return self.expression( 3515 exp.Subquery, 3516 this=this, 3517 pivots=self._parse_pivots(), 3518 alias=self._parse_table_alias() if parse_alias else None, 3519 sample=self._parse_table_sample(), 3520 ) 3521 3522 def _implicit_unnests_to_explicit(self, this: E) -> E: 3523 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3524 3525 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3526 for i, join in enumerate(this.args.get("joins") or []): 3527 table = join.this 3528 normalized_table = table.copy() 3529 normalized_table.meta["maybe_column"] = True 3530 normalized_table = _norm(normalized_table, dialect=self.dialect) 3531 3532 if isinstance(table, exp.Table) and not join.args.get("on"): 3533 if normalized_table.parts[0].name in refs: 3534 table_as_column = table.to_column() 3535 unnest = exp.Unnest(expressions=[table_as_column]) 3536 3537 # Table.to_column creates a parent Alias node that we want to convert to 3538 # a TableAlias and attach to the Unnest, so it matches the parser's output 3539 if isinstance(table.args.get("alias"), exp.TableAlias): 3540 table_as_column.replace(table_as_column.this) 3541 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3542 3543 table.replace(unnest) 3544 3545 refs.add(normalized_table.alias_or_name) 3546 3547 return this 3548 3549 def _parse_query_modifiers( 3550 self, this: t.Optional[exp.Expression] 3551 ) -> t.Optional[exp.Expression]: 3552 if isinstance(this, self.MODIFIABLES): 3553 for join in self._parse_joins(): 3554 this.append("joins", join) 3555 for lateral in iter(self._parse_lateral, None): 3556 this.append("laterals", lateral) 3557 3558 while True: 3559 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3560 modifier_token = self._curr 3561 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3562 key, expression = parser(self) 3563 3564 if expression: 3565 if this.args.get(key): 3566 self.raise_error( 3567 f"Found multiple '{modifier_token.text.upper()}' clauses", 3568 token=modifier_token, 3569 ) 3570 3571 this.set(key, expression) 3572 if key == "limit": 3573 offset = expression.args.pop("offset", None) 3574 3575 if offset: 3576 offset = exp.Offset(expression=offset) 3577 this.set("offset", offset) 3578 3579 limit_by_expressions = expression.expressions 3580 expression.set("expressions", None) 3581 offset.set("expressions", limit_by_expressions) 3582 continue 3583 break 3584 3585 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3586 this = self._implicit_unnests_to_explicit(this) 3587 3588 return this 3589 3590 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3591 start = self._curr 3592 while self._curr: 3593 self._advance() 3594 3595 end = self._tokens[self._index - 1] 3596 return exp.Hint(expressions=[self._find_sql(start, end)]) 3597 3598 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3599 return self._parse_function_call() 3600 3601 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3602 start_index = self._index 3603 should_fallback_to_string = False 3604 3605 hints = [] 3606 try: 3607 for hint in iter( 3608 lambda: self._parse_csv( 3609 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3610 ), 3611 [], 3612 ): 3613 hints.extend(hint) 3614 except ParseError: 3615 should_fallback_to_string = True 3616 3617 if should_fallback_to_string or self._curr: 3618 self._retreat(start_index) 3619 return self._parse_hint_fallback_to_string() 3620 3621 return self.expression(exp.Hint, expressions=hints) 3622 3623 def _parse_hint(self) -> t.Optional[exp.Hint]: 3624 if self._match(TokenType.HINT) and self._prev_comments: 3625 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3626 3627 return None 3628 3629 def _parse_into(self) -> t.Optional[exp.Into]: 3630 if not self._match(TokenType.INTO): 3631 return None 3632 3633 temp = self._match(TokenType.TEMPORARY) 3634 unlogged = self._match_text_seq("UNLOGGED") 3635 self._match(TokenType.TABLE) 3636 3637 return self.expression( 3638 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3639 ) 3640 3641 def _parse_from( 3642 self, 3643 joins: bool = False, 3644 skip_from_token: bool = False, 3645 consume_pipe: bool = False, 3646 ) -> t.Optional[exp.From]: 3647 if not skip_from_token and not self._match(TokenType.FROM): 3648 return None 3649 3650 return self.expression( 3651 exp.From, 3652 comments=self._prev_comments, 3653 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3654 ) 3655 3656 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3657 return self.expression( 3658 exp.MatchRecognizeMeasure, 3659 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3660 this=self._parse_expression(), 3661 ) 3662 3663 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3664 if not self._match(TokenType.MATCH_RECOGNIZE): 3665 return None 3666 3667 self._match_l_paren() 3668 3669 partition = self._parse_partition_by() 3670 order = self._parse_order() 3671 3672 measures = ( 3673 self._parse_csv(self._parse_match_recognize_measure) 3674 if self._match_text_seq("MEASURES") 3675 else None 3676 ) 3677 3678 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3679 rows = exp.var("ONE ROW PER MATCH") 3680 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3681 text = "ALL ROWS PER MATCH" 3682 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3683 text += " SHOW EMPTY MATCHES" 3684 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3685 text += " OMIT EMPTY MATCHES" 3686 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3687 text += " WITH UNMATCHED ROWS" 3688 rows = exp.var(text) 3689 else: 3690 rows = None 3691 3692 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3693 text = "AFTER MATCH SKIP" 3694 if self._match_text_seq("PAST", "LAST", "ROW"): 3695 text += " PAST LAST ROW" 3696 elif self._match_text_seq("TO", "NEXT", "ROW"): 3697 text += " TO NEXT ROW" 3698 elif self._match_text_seq("TO", "FIRST"): 3699 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3700 elif self._match_text_seq("TO", "LAST"): 3701 text += f" TO LAST {self._advance_any().text}" # type: ignore 3702 after = exp.var(text) 3703 else: 3704 after = None 3705 3706 if self._match_text_seq("PATTERN"): 3707 self._match_l_paren() 3708 3709 if not self._curr: 3710 self.raise_error("Expecting )", self._curr) 3711 3712 paren = 1 3713 start = self._curr 3714 3715 while self._curr and paren > 0: 3716 if self._curr.token_type == TokenType.L_PAREN: 3717 paren += 1 3718 if self._curr.token_type == TokenType.R_PAREN: 3719 paren -= 1 3720 3721 end = self._prev 3722 self._advance() 3723 3724 if paren > 0: 3725 self.raise_error("Expecting )", self._curr) 3726 3727 pattern = exp.var(self._find_sql(start, end)) 3728 else: 3729 pattern = None 3730 3731 define = ( 3732 self._parse_csv(self._parse_name_as_expression) 3733 if self._match_text_seq("DEFINE") 3734 else None 3735 ) 3736 3737 self._match_r_paren() 3738 3739 return self.expression( 3740 exp.MatchRecognize, 3741 partition_by=partition, 3742 order=order, 3743 measures=measures, 3744 rows=rows, 3745 after=after, 3746 pattern=pattern, 3747 define=define, 3748 alias=self._parse_table_alias(), 3749 ) 3750 3751 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3752 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3753 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3754 cross_apply = False 3755 3756 if cross_apply is not None: 3757 this = self._parse_select(table=True) 3758 view = None 3759 outer = None 3760 elif self._match(TokenType.LATERAL): 3761 this = self._parse_select(table=True) 3762 view = self._match(TokenType.VIEW) 3763 outer = self._match(TokenType.OUTER) 3764 else: 3765 return None 3766 3767 if not this: 3768 this = ( 3769 self._parse_unnest() 3770 or self._parse_function() 3771 or self._parse_id_var(any_token=False) 3772 ) 3773 3774 while self._match(TokenType.DOT): 3775 this = exp.Dot( 3776 this=this, 3777 expression=self._parse_function() or self._parse_id_var(any_token=False), 3778 ) 3779 3780 ordinality: t.Optional[bool] = None 3781 3782 if view: 3783 table = self._parse_id_var(any_token=False) 3784 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3785 table_alias: t.Optional[exp.TableAlias] = self.expression( 3786 exp.TableAlias, this=table, columns=columns 3787 ) 3788 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3789 # We move the alias from the lateral's child node to the lateral itself 3790 table_alias = this.args["alias"].pop() 3791 else: 3792 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3793 table_alias = self._parse_table_alias() 3794 3795 return self.expression( 3796 exp.Lateral, 3797 this=this, 3798 view=view, 3799 outer=outer, 3800 alias=table_alias, 3801 cross_apply=cross_apply, 3802 ordinality=ordinality, 3803 ) 3804 3805 def _parse_join_parts( 3806 self, 3807 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3808 return ( 3809 self._match_set(self.JOIN_METHODS) and self._prev, 3810 self._match_set(self.JOIN_SIDES) and self._prev, 3811 self._match_set(self.JOIN_KINDS) and self._prev, 3812 ) 3813 3814 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3815 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3816 this = self._parse_column() 3817 if isinstance(this, exp.Column): 3818 return this.this 3819 return this 3820 3821 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3822 3823 def _parse_join( 3824 self, skip_join_token: bool = False, parse_bracket: bool = False 3825 ) -> t.Optional[exp.Join]: 3826 if self._match(TokenType.COMMA): 3827 table = self._try_parse(self._parse_table) 3828 cross_join = self.expression(exp.Join, this=table) if table else None 3829 3830 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3831 cross_join.set("kind", "CROSS") 3832 3833 return cross_join 3834 3835 index = self._index 3836 method, side, kind = self._parse_join_parts() 3837 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3838 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3839 join_comments = self._prev_comments 3840 3841 if not skip_join_token and not join: 3842 self._retreat(index) 3843 kind = None 3844 method = None 3845 side = None 3846 3847 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3848 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3849 3850 if not skip_join_token and not join and not outer_apply and not cross_apply: 3851 return None 3852 3853 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3854 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3855 kwargs["expressions"] = self._parse_csv( 3856 lambda: self._parse_table(parse_bracket=parse_bracket) 3857 ) 3858 3859 if method: 3860 kwargs["method"] = method.text 3861 if side: 3862 kwargs["side"] = side.text 3863 if kind: 3864 kwargs["kind"] = kind.text 3865 if hint: 3866 kwargs["hint"] = hint 3867 3868 if self._match(TokenType.MATCH_CONDITION): 3869 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3870 3871 if self._match(TokenType.ON): 3872 kwargs["on"] = self._parse_assignment() 3873 elif self._match(TokenType.USING): 3874 kwargs["using"] = self._parse_using_identifiers() 3875 elif ( 3876 not method 3877 and not (outer_apply or cross_apply) 3878 and not isinstance(kwargs["this"], exp.Unnest) 3879 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3880 ): 3881 index = self._index 3882 joins: t.Optional[list] = list(self._parse_joins()) 3883 3884 if joins and self._match(TokenType.ON): 3885 kwargs["on"] = self._parse_assignment() 3886 elif joins and self._match(TokenType.USING): 3887 kwargs["using"] = self._parse_using_identifiers() 3888 else: 3889 joins = None 3890 self._retreat(index) 3891 3892 kwargs["this"].set("joins", joins if joins else None) 3893 3894 kwargs["pivots"] = self._parse_pivots() 3895 3896 comments = [c for token in (method, side, kind) if token for c in token.comments] 3897 comments = (join_comments or []) + comments 3898 3899 if ( 3900 self.ADD_JOIN_ON_TRUE 3901 and not kwargs.get("on") 3902 and not kwargs.get("using") 3903 and not kwargs.get("method") 3904 and kwargs.get("kind") in (None, "INNER", "OUTER") 3905 ): 3906 kwargs["on"] = exp.true() 3907 3908 return self.expression(exp.Join, comments=comments, **kwargs) 3909 3910 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3911 this = self._parse_assignment() 3912 3913 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3914 return this 3915 3916 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3917 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3918 3919 return this 3920 3921 def _parse_index_params(self) -> exp.IndexParameters: 3922 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3923 3924 if self._match(TokenType.L_PAREN, advance=False): 3925 columns = self._parse_wrapped_csv(self._parse_with_operator) 3926 else: 3927 columns = None 3928 3929 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3930 partition_by = self._parse_partition_by() 3931 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3932 tablespace = ( 3933 self._parse_var(any_token=True) 3934 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3935 else None 3936 ) 3937 where = self._parse_where() 3938 3939 on = self._parse_field() if self._match(TokenType.ON) else None 3940 3941 return self.expression( 3942 exp.IndexParameters, 3943 using=using, 3944 columns=columns, 3945 include=include, 3946 partition_by=partition_by, 3947 where=where, 3948 with_storage=with_storage, 3949 tablespace=tablespace, 3950 on=on, 3951 ) 3952 3953 def _parse_index( 3954 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3955 ) -> t.Optional[exp.Index]: 3956 if index or anonymous: 3957 unique = None 3958 primary = None 3959 amp = None 3960 3961 self._match(TokenType.ON) 3962 self._match(TokenType.TABLE) # hive 3963 table = self._parse_table_parts(schema=True) 3964 else: 3965 unique = self._match(TokenType.UNIQUE) 3966 primary = self._match_text_seq("PRIMARY") 3967 amp = self._match_text_seq("AMP") 3968 3969 if not self._match(TokenType.INDEX): 3970 return None 3971 3972 index = self._parse_id_var() 3973 table = None 3974 3975 params = self._parse_index_params() 3976 3977 return self.expression( 3978 exp.Index, 3979 this=index, 3980 table=table, 3981 unique=unique, 3982 primary=primary, 3983 amp=amp, 3984 params=params, 3985 ) 3986 3987 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3988 hints: t.List[exp.Expression] = [] 3989 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3990 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3991 hints.append( 3992 self.expression( 3993 exp.WithTableHint, 3994 expressions=self._parse_csv( 3995 lambda: self._parse_function() or self._parse_var(any_token=True) 3996 ), 3997 ) 3998 ) 3999 self._match_r_paren() 4000 else: 4001 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4002 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4003 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4004 4005 self._match_set((TokenType.INDEX, TokenType.KEY)) 4006 if self._match(TokenType.FOR): 4007 hint.set("target", self._advance_any() and self._prev.text.upper()) 4008 4009 hint.set("expressions", self._parse_wrapped_id_vars()) 4010 hints.append(hint) 4011 4012 return hints or None 4013 4014 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4015 return ( 4016 (not schema and self._parse_function(optional_parens=False)) 4017 or self._parse_id_var(any_token=False) 4018 or self._parse_string_as_identifier() 4019 or self._parse_placeholder() 4020 ) 4021 4022 def _parse_table_parts( 4023 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4024 ) -> exp.Table: 4025 catalog = None 4026 db = None 4027 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4028 4029 while self._match(TokenType.DOT): 4030 if catalog: 4031 # This allows nesting the table in arbitrarily many dot expressions if needed 4032 table = self.expression( 4033 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4034 ) 4035 else: 4036 catalog = db 4037 db = table 4038 # "" used for tsql FROM a..b case 4039 table = self._parse_table_part(schema=schema) or "" 4040 4041 if ( 4042 wildcard 4043 and self._is_connected() 4044 and (isinstance(table, exp.Identifier) or not table) 4045 and self._match(TokenType.STAR) 4046 ): 4047 if isinstance(table, exp.Identifier): 4048 table.args["this"] += "*" 4049 else: 4050 table = exp.Identifier(this="*") 4051 4052 # We bubble up comments from the Identifier to the Table 4053 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4054 4055 if is_db_reference: 4056 catalog = db 4057 db = table 4058 table = None 4059 4060 if not table and not is_db_reference: 4061 self.raise_error(f"Expected table name but got {self._curr}") 4062 if not db and is_db_reference: 4063 self.raise_error(f"Expected database name but got {self._curr}") 4064 4065 table = self.expression( 4066 exp.Table, 4067 comments=comments, 4068 this=table, 4069 db=db, 4070 catalog=catalog, 4071 ) 4072 4073 changes = self._parse_changes() 4074 if changes: 4075 table.set("changes", changes) 4076 4077 at_before = self._parse_historical_data() 4078 if at_before: 4079 table.set("when", at_before) 4080 4081 pivots = self._parse_pivots() 4082 if pivots: 4083 table.set("pivots", pivots) 4084 4085 return table 4086 4087 def _parse_table( 4088 self, 4089 schema: bool = False, 4090 joins: bool = False, 4091 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4092 parse_bracket: bool = False, 4093 is_db_reference: bool = False, 4094 parse_partition: bool = False, 4095 consume_pipe: bool = False, 4096 ) -> t.Optional[exp.Expression]: 4097 lateral = self._parse_lateral() 4098 if lateral: 4099 return lateral 4100 4101 unnest = self._parse_unnest() 4102 if unnest: 4103 return unnest 4104 4105 values = self._parse_derived_table_values() 4106 if values: 4107 return values 4108 4109 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4110 if subquery: 4111 if not subquery.args.get("pivots"): 4112 subquery.set("pivots", self._parse_pivots()) 4113 return subquery 4114 4115 bracket = parse_bracket and self._parse_bracket(None) 4116 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4117 4118 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4119 self._parse_table 4120 ) 4121 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4122 4123 only = self._match(TokenType.ONLY) 4124 4125 this = t.cast( 4126 exp.Expression, 4127 bracket 4128 or rows_from 4129 or self._parse_bracket( 4130 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4131 ), 4132 ) 4133 4134 if only: 4135 this.set("only", only) 4136 4137 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4138 self._match_text_seq("*") 4139 4140 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4141 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4142 this.set("partition", self._parse_partition()) 4143 4144 if schema: 4145 return self._parse_schema(this=this) 4146 4147 version = self._parse_version() 4148 4149 if version: 4150 this.set("version", version) 4151 4152 if self.dialect.ALIAS_POST_TABLESAMPLE: 4153 this.set("sample", self._parse_table_sample()) 4154 4155 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4156 if alias: 4157 this.set("alias", alias) 4158 4159 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4160 return self.expression( 4161 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4162 ) 4163 4164 this.set("hints", self._parse_table_hints()) 4165 4166 if not this.args.get("pivots"): 4167 this.set("pivots", self._parse_pivots()) 4168 4169 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4170 this.set("sample", self._parse_table_sample()) 4171 4172 if joins: 4173 for join in self._parse_joins(): 4174 this.append("joins", join) 4175 4176 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4177 this.set("ordinality", True) 4178 this.set("alias", self._parse_table_alias()) 4179 4180 return this 4181 4182 def _parse_version(self) -> t.Optional[exp.Version]: 4183 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4184 this = "TIMESTAMP" 4185 elif self._match(TokenType.VERSION_SNAPSHOT): 4186 this = "VERSION" 4187 else: 4188 return None 4189 4190 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4191 kind = self._prev.text.upper() 4192 start = self._parse_bitwise() 4193 self._match_texts(("TO", "AND")) 4194 end = self._parse_bitwise() 4195 expression: t.Optional[exp.Expression] = self.expression( 4196 exp.Tuple, expressions=[start, end] 4197 ) 4198 elif self._match_text_seq("CONTAINED", "IN"): 4199 kind = "CONTAINED IN" 4200 expression = self.expression( 4201 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4202 ) 4203 elif self._match(TokenType.ALL): 4204 kind = "ALL" 4205 expression = None 4206 else: 4207 self._match_text_seq("AS", "OF") 4208 kind = "AS OF" 4209 expression = self._parse_type() 4210 4211 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4212 4213 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4214 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4215 index = self._index 4216 historical_data = None 4217 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4218 this = self._prev.text.upper() 4219 kind = ( 4220 self._match(TokenType.L_PAREN) 4221 and self._match_texts(self.HISTORICAL_DATA_KIND) 4222 and self._prev.text.upper() 4223 ) 4224 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4225 4226 if expression: 4227 self._match_r_paren() 4228 historical_data = self.expression( 4229 exp.HistoricalData, this=this, kind=kind, expression=expression 4230 ) 4231 else: 4232 self._retreat(index) 4233 4234 return historical_data 4235 4236 def _parse_changes(self) -> t.Optional[exp.Changes]: 4237 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4238 return None 4239 4240 information = self._parse_var(any_token=True) 4241 self._match_r_paren() 4242 4243 return self.expression( 4244 exp.Changes, 4245 information=information, 4246 at_before=self._parse_historical_data(), 4247 end=self._parse_historical_data(), 4248 ) 4249 4250 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4251 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4252 return None 4253 4254 self._advance() 4255 4256 expressions = self._parse_wrapped_csv(self._parse_equality) 4257 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4258 4259 alias = self._parse_table_alias() if with_alias else None 4260 4261 if alias: 4262 if self.dialect.UNNEST_COLUMN_ONLY: 4263 if alias.args.get("columns"): 4264 self.raise_error("Unexpected extra column alias in unnest.") 4265 4266 alias.set("columns", [alias.this]) 4267 alias.set("this", None) 4268 4269 columns = alias.args.get("columns") or [] 4270 if offset and len(expressions) < len(columns): 4271 offset = columns.pop() 4272 4273 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4274 self._match(TokenType.ALIAS) 4275 offset = self._parse_id_var( 4276 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4277 ) or exp.to_identifier("offset") 4278 4279 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4280 4281 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4282 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4283 if not is_derived and not ( 4284 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4285 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4286 ): 4287 return None 4288 4289 expressions = self._parse_csv(self._parse_value) 4290 alias = self._parse_table_alias() 4291 4292 if is_derived: 4293 self._match_r_paren() 4294 4295 return self.expression( 4296 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4297 ) 4298 4299 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4300 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4301 as_modifier and self._match_text_seq("USING", "SAMPLE") 4302 ): 4303 return None 4304 4305 bucket_numerator = None 4306 bucket_denominator = None 4307 bucket_field = None 4308 percent = None 4309 size = None 4310 seed = None 4311 4312 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4313 matched_l_paren = self._match(TokenType.L_PAREN) 4314 4315 if self.TABLESAMPLE_CSV: 4316 num = None 4317 expressions = self._parse_csv(self._parse_primary) 4318 else: 4319 expressions = None 4320 num = ( 4321 self._parse_factor() 4322 if self._match(TokenType.NUMBER, advance=False) 4323 else self._parse_primary() or self._parse_placeholder() 4324 ) 4325 4326 if self._match_text_seq("BUCKET"): 4327 bucket_numerator = self._parse_number() 4328 self._match_text_seq("OUT", "OF") 4329 bucket_denominator = bucket_denominator = self._parse_number() 4330 self._match(TokenType.ON) 4331 bucket_field = self._parse_field() 4332 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4333 percent = num 4334 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4335 size = num 4336 else: 4337 percent = num 4338 4339 if matched_l_paren: 4340 self._match_r_paren() 4341 4342 if self._match(TokenType.L_PAREN): 4343 method = self._parse_var(upper=True) 4344 seed = self._match(TokenType.COMMA) and self._parse_number() 4345 self._match_r_paren() 4346 elif self._match_texts(("SEED", "REPEATABLE")): 4347 seed = self._parse_wrapped(self._parse_number) 4348 4349 if not method and self.DEFAULT_SAMPLING_METHOD: 4350 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4351 4352 return self.expression( 4353 exp.TableSample, 4354 expressions=expressions, 4355 method=method, 4356 bucket_numerator=bucket_numerator, 4357 bucket_denominator=bucket_denominator, 4358 bucket_field=bucket_field, 4359 percent=percent, 4360 size=size, 4361 seed=seed, 4362 ) 4363 4364 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4365 return list(iter(self._parse_pivot, None)) or None 4366 4367 def _parse_joins(self) -> t.Iterator[exp.Join]: 4368 return iter(self._parse_join, None) 4369 4370 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4371 if not self._match(TokenType.INTO): 4372 return None 4373 4374 return self.expression( 4375 exp.UnpivotColumns, 4376 this=self._match_text_seq("NAME") and self._parse_column(), 4377 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4378 ) 4379 4380 # https://duckdb.org/docs/sql/statements/pivot 4381 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4382 def _parse_on() -> t.Optional[exp.Expression]: 4383 this = self._parse_bitwise() 4384 4385 if self._match(TokenType.IN): 4386 # PIVOT ... ON col IN (row_val1, row_val2) 4387 return self._parse_in(this) 4388 if self._match(TokenType.ALIAS, advance=False): 4389 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4390 return self._parse_alias(this) 4391 4392 return this 4393 4394 this = self._parse_table() 4395 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4396 into = self._parse_unpivot_columns() 4397 using = self._match(TokenType.USING) and self._parse_csv( 4398 lambda: self._parse_alias(self._parse_function()) 4399 ) 4400 group = self._parse_group() 4401 4402 return self.expression( 4403 exp.Pivot, 4404 this=this, 4405 expressions=expressions, 4406 using=using, 4407 group=group, 4408 unpivot=is_unpivot, 4409 into=into, 4410 ) 4411 4412 def _parse_pivot_in(self) -> exp.In: 4413 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4414 this = self._parse_select_or_expression() 4415 4416 self._match(TokenType.ALIAS) 4417 alias = self._parse_bitwise() 4418 if alias: 4419 if isinstance(alias, exp.Column) and not alias.db: 4420 alias = alias.this 4421 return self.expression(exp.PivotAlias, this=this, alias=alias) 4422 4423 return this 4424 4425 value = self._parse_column() 4426 4427 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4428 self.raise_error("Expecting IN (") 4429 4430 if self._match(TokenType.ANY): 4431 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4432 else: 4433 exprs = self._parse_csv(_parse_aliased_expression) 4434 4435 self._match_r_paren() 4436 return self.expression(exp.In, this=value, expressions=exprs) 4437 4438 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4439 func = self._parse_function() 4440 if not func: 4441 if self._prev and self._prev.token_type == TokenType.COMMA: 4442 return None 4443 self.raise_error("Expecting an aggregation function in PIVOT") 4444 4445 return self._parse_alias(func) 4446 4447 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4448 index = self._index 4449 include_nulls = None 4450 4451 if self._match(TokenType.PIVOT): 4452 unpivot = False 4453 elif self._match(TokenType.UNPIVOT): 4454 unpivot = True 4455 4456 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4457 if self._match_text_seq("INCLUDE", "NULLS"): 4458 include_nulls = True 4459 elif self._match_text_seq("EXCLUDE", "NULLS"): 4460 include_nulls = False 4461 else: 4462 return None 4463 4464 expressions = [] 4465 4466 if not self._match(TokenType.L_PAREN): 4467 self._retreat(index) 4468 return None 4469 4470 if unpivot: 4471 expressions = self._parse_csv(self._parse_column) 4472 else: 4473 expressions = self._parse_csv(self._parse_pivot_aggregation) 4474 4475 if not expressions: 4476 self.raise_error("Failed to parse PIVOT's aggregation list") 4477 4478 if not self._match(TokenType.FOR): 4479 self.raise_error("Expecting FOR") 4480 4481 fields = [] 4482 while True: 4483 field = self._try_parse(self._parse_pivot_in) 4484 if not field: 4485 break 4486 fields.append(field) 4487 4488 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4489 self._parse_bitwise 4490 ) 4491 4492 group = self._parse_group() 4493 4494 self._match_r_paren() 4495 4496 pivot = self.expression( 4497 exp.Pivot, 4498 expressions=expressions, 4499 fields=fields, 4500 unpivot=unpivot, 4501 include_nulls=include_nulls, 4502 default_on_null=default_on_null, 4503 group=group, 4504 ) 4505 4506 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4507 pivot.set("alias", self._parse_table_alias()) 4508 4509 if not unpivot: 4510 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4511 4512 columns: t.List[exp.Expression] = [] 4513 all_fields = [] 4514 for pivot_field in pivot.fields: 4515 pivot_field_expressions = pivot_field.expressions 4516 4517 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4518 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4519 continue 4520 4521 all_fields.append( 4522 [ 4523 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4524 for fld in pivot_field_expressions 4525 ] 4526 ) 4527 4528 if all_fields: 4529 if names: 4530 all_fields.append(names) 4531 4532 # Generate all possible combinations of the pivot columns 4533 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4534 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4535 for fld_parts_tuple in itertools.product(*all_fields): 4536 fld_parts = list(fld_parts_tuple) 4537 4538 if names and self.PREFIXED_PIVOT_COLUMNS: 4539 # Move the "name" to the front of the list 4540 fld_parts.insert(0, fld_parts.pop(-1)) 4541 4542 columns.append(exp.to_identifier("_".join(fld_parts))) 4543 4544 pivot.set("columns", columns) 4545 4546 return pivot 4547 4548 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4549 return [agg.alias for agg in aggregations if agg.alias] 4550 4551 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4552 if not skip_where_token and not self._match(TokenType.PREWHERE): 4553 return None 4554 4555 return self.expression( 4556 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4557 ) 4558 4559 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4560 if not skip_where_token and not self._match(TokenType.WHERE): 4561 return None 4562 4563 return self.expression( 4564 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4565 ) 4566 4567 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4568 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4569 return None 4570 comments = self._prev_comments 4571 4572 elements: t.Dict[str, t.Any] = defaultdict(list) 4573 4574 if self._match(TokenType.ALL): 4575 elements["all"] = True 4576 elif self._match(TokenType.DISTINCT): 4577 elements["all"] = False 4578 4579 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4580 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4581 4582 while True: 4583 index = self._index 4584 4585 elements["expressions"].extend( 4586 self._parse_csv( 4587 lambda: None 4588 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4589 else self._parse_assignment() 4590 ) 4591 ) 4592 4593 before_with_index = self._index 4594 with_prefix = self._match(TokenType.WITH) 4595 4596 if self._match(TokenType.ROLLUP): 4597 elements["rollup"].append( 4598 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4599 ) 4600 elif self._match(TokenType.CUBE): 4601 elements["cube"].append( 4602 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4603 ) 4604 elif self._match(TokenType.GROUPING_SETS): 4605 elements["grouping_sets"].append( 4606 self.expression( 4607 exp.GroupingSets, 4608 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4609 ) 4610 ) 4611 elif self._match_text_seq("TOTALS"): 4612 elements["totals"] = True # type: ignore 4613 4614 if before_with_index <= self._index <= before_with_index + 1: 4615 self._retreat(before_with_index) 4616 break 4617 4618 if index == self._index: 4619 break 4620 4621 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4622 4623 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4624 return self.expression( 4625 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4626 ) 4627 4628 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4629 if self._match(TokenType.L_PAREN): 4630 grouping_set = self._parse_csv(self._parse_bitwise) 4631 self._match_r_paren() 4632 return self.expression(exp.Tuple, expressions=grouping_set) 4633 4634 return self._parse_column() 4635 4636 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4637 if not skip_having_token and not self._match(TokenType.HAVING): 4638 return None 4639 return self.expression( 4640 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4641 ) 4642 4643 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4644 if not self._match(TokenType.QUALIFY): 4645 return None 4646 return self.expression(exp.Qualify, this=self._parse_assignment()) 4647 4648 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4649 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4650 exp.Prior, this=self._parse_bitwise() 4651 ) 4652 connect = self._parse_assignment() 4653 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4654 return connect 4655 4656 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4657 if skip_start_token: 4658 start = None 4659 elif self._match(TokenType.START_WITH): 4660 start = self._parse_assignment() 4661 else: 4662 return None 4663 4664 self._match(TokenType.CONNECT_BY) 4665 nocycle = self._match_text_seq("NOCYCLE") 4666 connect = self._parse_connect_with_prior() 4667 4668 if not start and self._match(TokenType.START_WITH): 4669 start = self._parse_assignment() 4670 4671 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4672 4673 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4674 this = self._parse_id_var(any_token=True) 4675 if self._match(TokenType.ALIAS): 4676 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4677 return this 4678 4679 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4680 if self._match_text_seq("INTERPOLATE"): 4681 return self._parse_wrapped_csv(self._parse_name_as_expression) 4682 return None 4683 4684 def _parse_order( 4685 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4686 ) -> t.Optional[exp.Expression]: 4687 siblings = None 4688 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4689 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4690 return this 4691 4692 siblings = True 4693 4694 return self.expression( 4695 exp.Order, 4696 comments=self._prev_comments, 4697 this=this, 4698 expressions=self._parse_csv(self._parse_ordered), 4699 siblings=siblings, 4700 ) 4701 4702 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4703 if not self._match(token): 4704 return None 4705 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4706 4707 def _parse_ordered( 4708 self, parse_method: t.Optional[t.Callable] = None 4709 ) -> t.Optional[exp.Ordered]: 4710 this = parse_method() if parse_method else self._parse_assignment() 4711 if not this: 4712 return None 4713 4714 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4715 this = exp.var("ALL") 4716 4717 asc = self._match(TokenType.ASC) 4718 desc = self._match(TokenType.DESC) or (asc and False) 4719 4720 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4721 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4722 4723 nulls_first = is_nulls_first or False 4724 explicitly_null_ordered = is_nulls_first or is_nulls_last 4725 4726 if ( 4727 not explicitly_null_ordered 4728 and ( 4729 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4730 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4731 ) 4732 and self.dialect.NULL_ORDERING != "nulls_are_last" 4733 ): 4734 nulls_first = True 4735 4736 if self._match_text_seq("WITH", "FILL"): 4737 with_fill = self.expression( 4738 exp.WithFill, 4739 **{ # type: ignore 4740 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4741 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4742 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4743 "interpolate": self._parse_interpolate(), 4744 }, 4745 ) 4746 else: 4747 with_fill = None 4748 4749 return self.expression( 4750 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4751 ) 4752 4753 def _parse_limit_options(self) -> exp.LimitOptions: 4754 percent = self._match(TokenType.PERCENT) 4755 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4756 self._match_text_seq("ONLY") 4757 with_ties = self._match_text_seq("WITH", "TIES") 4758 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4759 4760 def _parse_limit( 4761 self, 4762 this: t.Optional[exp.Expression] = None, 4763 top: bool = False, 4764 skip_limit_token: bool = False, 4765 ) -> t.Optional[exp.Expression]: 4766 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4767 comments = self._prev_comments 4768 if top: 4769 limit_paren = self._match(TokenType.L_PAREN) 4770 expression = self._parse_term() if limit_paren else self._parse_number() 4771 4772 if limit_paren: 4773 self._match_r_paren() 4774 4775 limit_options = self._parse_limit_options() 4776 else: 4777 limit_options = None 4778 expression = self._parse_term() 4779 4780 if self._match(TokenType.COMMA): 4781 offset = expression 4782 expression = self._parse_term() 4783 else: 4784 offset = None 4785 4786 limit_exp = self.expression( 4787 exp.Limit, 4788 this=this, 4789 expression=expression, 4790 offset=offset, 4791 comments=comments, 4792 limit_options=limit_options, 4793 expressions=self._parse_limit_by(), 4794 ) 4795 4796 return limit_exp 4797 4798 if self._match(TokenType.FETCH): 4799 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4800 direction = self._prev.text.upper() if direction else "FIRST" 4801 4802 count = self._parse_field(tokens=self.FETCH_TOKENS) 4803 4804 return self.expression( 4805 exp.Fetch, 4806 direction=direction, 4807 count=count, 4808 limit_options=self._parse_limit_options(), 4809 ) 4810 4811 return this 4812 4813 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4814 if not self._match(TokenType.OFFSET): 4815 return this 4816 4817 count = self._parse_term() 4818 self._match_set((TokenType.ROW, TokenType.ROWS)) 4819 4820 return self.expression( 4821 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4822 ) 4823 4824 def _can_parse_limit_or_offset(self) -> bool: 4825 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4826 return False 4827 4828 index = self._index 4829 result = bool( 4830 self._try_parse(self._parse_limit, retreat=True) 4831 or self._try_parse(self._parse_offset, retreat=True) 4832 ) 4833 self._retreat(index) 4834 return result 4835 4836 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4837 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4838 4839 def _parse_locks(self) -> t.List[exp.Lock]: 4840 locks = [] 4841 while True: 4842 update, key = None, None 4843 if self._match_text_seq("FOR", "UPDATE"): 4844 update = True 4845 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4846 "LOCK", "IN", "SHARE", "MODE" 4847 ): 4848 update = False 4849 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4850 update, key = False, True 4851 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4852 update, key = True, True 4853 else: 4854 break 4855 4856 expressions = None 4857 if self._match_text_seq("OF"): 4858 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4859 4860 wait: t.Optional[bool | exp.Expression] = None 4861 if self._match_text_seq("NOWAIT"): 4862 wait = True 4863 elif self._match_text_seq("WAIT"): 4864 wait = self._parse_primary() 4865 elif self._match_text_seq("SKIP", "LOCKED"): 4866 wait = False 4867 4868 locks.append( 4869 self.expression( 4870 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4871 ) 4872 ) 4873 4874 return locks 4875 4876 def parse_set_operation( 4877 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4878 ) -> t.Optional[exp.Expression]: 4879 start = self._index 4880 _, side_token, kind_token = self._parse_join_parts() 4881 4882 side = side_token.text if side_token else None 4883 kind = kind_token.text if kind_token else None 4884 4885 if not self._match_set(self.SET_OPERATIONS): 4886 self._retreat(start) 4887 return None 4888 4889 token_type = self._prev.token_type 4890 4891 if token_type == TokenType.UNION: 4892 operation: t.Type[exp.SetOperation] = exp.Union 4893 elif token_type == TokenType.EXCEPT: 4894 operation = exp.Except 4895 else: 4896 operation = exp.Intersect 4897 4898 comments = self._prev.comments 4899 4900 if self._match(TokenType.DISTINCT): 4901 distinct: t.Optional[bool] = True 4902 elif self._match(TokenType.ALL): 4903 distinct = False 4904 else: 4905 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4906 if distinct is None: 4907 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4908 4909 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4910 "STRICT", "CORRESPONDING" 4911 ) 4912 if self._match_text_seq("CORRESPONDING"): 4913 by_name = True 4914 if not side and not kind: 4915 kind = "INNER" 4916 4917 on_column_list = None 4918 if by_name and self._match_texts(("ON", "BY")): 4919 on_column_list = self._parse_wrapped_csv(self._parse_column) 4920 4921 expression = self._parse_select( 4922 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4923 ) 4924 4925 return self.expression( 4926 operation, 4927 comments=comments, 4928 this=this, 4929 distinct=distinct, 4930 by_name=by_name, 4931 expression=expression, 4932 side=side, 4933 kind=kind, 4934 on=on_column_list, 4935 ) 4936 4937 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4938 while this: 4939 setop = self.parse_set_operation(this) 4940 if not setop: 4941 break 4942 this = setop 4943 4944 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4945 expression = this.expression 4946 4947 if expression: 4948 for arg in self.SET_OP_MODIFIERS: 4949 expr = expression.args.get(arg) 4950 if expr: 4951 this.set(arg, expr.pop()) 4952 4953 return this 4954 4955 def _parse_expression(self) -> t.Optional[exp.Expression]: 4956 return self._parse_alias(self._parse_assignment()) 4957 4958 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4959 this = self._parse_disjunction() 4960 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4961 # This allows us to parse <non-identifier token> := <expr> 4962 this = exp.column( 4963 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4964 ) 4965 4966 while self._match_set(self.ASSIGNMENT): 4967 if isinstance(this, exp.Column) and len(this.parts) == 1: 4968 this = this.this 4969 4970 this = self.expression( 4971 self.ASSIGNMENT[self._prev.token_type], 4972 this=this, 4973 comments=self._prev_comments, 4974 expression=self._parse_assignment(), 4975 ) 4976 4977 return this 4978 4979 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4980 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4981 4982 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4983 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4984 4985 def _parse_equality(self) -> t.Optional[exp.Expression]: 4986 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4987 4988 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4989 return self._parse_tokens(self._parse_range, self.COMPARISON) 4990 4991 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4992 this = this or self._parse_bitwise() 4993 negate = self._match(TokenType.NOT) 4994 4995 if self._match_set(self.RANGE_PARSERS): 4996 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4997 if not expression: 4998 return this 4999 5000 this = expression 5001 elif self._match(TokenType.ISNULL): 5002 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5003 5004 # Postgres supports ISNULL and NOTNULL for conditions. 5005 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5006 if self._match(TokenType.NOTNULL): 5007 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5008 this = self.expression(exp.Not, this=this) 5009 5010 if negate: 5011 this = self._negate_range(this) 5012 5013 if self._match(TokenType.IS): 5014 this = self._parse_is(this) 5015 5016 return this 5017 5018 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5019 if not this: 5020 return this 5021 5022 return self.expression(exp.Not, this=this) 5023 5024 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5025 index = self._index - 1 5026 negate = self._match(TokenType.NOT) 5027 5028 if self._match_text_seq("DISTINCT", "FROM"): 5029 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5030 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5031 5032 if self._match(TokenType.JSON): 5033 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5034 5035 if self._match_text_seq("WITH"): 5036 _with = True 5037 elif self._match_text_seq("WITHOUT"): 5038 _with = False 5039 else: 5040 _with = None 5041 5042 unique = self._match(TokenType.UNIQUE) 5043 self._match_text_seq("KEYS") 5044 expression: t.Optional[exp.Expression] = self.expression( 5045 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5046 ) 5047 else: 5048 expression = self._parse_primary() or self._parse_null() 5049 if not expression: 5050 self._retreat(index) 5051 return None 5052 5053 this = self.expression(exp.Is, this=this, expression=expression) 5054 return self.expression(exp.Not, this=this) if negate else this 5055 5056 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5057 unnest = self._parse_unnest(with_alias=False) 5058 if unnest: 5059 this = self.expression(exp.In, this=this, unnest=unnest) 5060 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5061 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5062 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5063 5064 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5065 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5066 else: 5067 this = self.expression(exp.In, this=this, expressions=expressions) 5068 5069 if matched_l_paren: 5070 self._match_r_paren(this) 5071 elif not self._match(TokenType.R_BRACKET, expression=this): 5072 self.raise_error("Expecting ]") 5073 else: 5074 this = self.expression(exp.In, this=this, field=self._parse_column()) 5075 5076 return this 5077 5078 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5079 symmetric = None 5080 if self._match_text_seq("SYMMETRIC"): 5081 symmetric = True 5082 elif self._match_text_seq("ASYMMETRIC"): 5083 symmetric = False 5084 5085 low = self._parse_bitwise() 5086 self._match(TokenType.AND) 5087 high = self._parse_bitwise() 5088 5089 return self.expression( 5090 exp.Between, 5091 this=this, 5092 low=low, 5093 high=high, 5094 symmetric=symmetric, 5095 ) 5096 5097 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5098 if not self._match(TokenType.ESCAPE): 5099 return this 5100 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5101 5102 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5103 index = self._index 5104 5105 if not self._match(TokenType.INTERVAL) and match_interval: 5106 return None 5107 5108 if self._match(TokenType.STRING, advance=False): 5109 this = self._parse_primary() 5110 else: 5111 this = self._parse_term() 5112 5113 if not this or ( 5114 isinstance(this, exp.Column) 5115 and not this.table 5116 and not this.this.quoted 5117 and self._curr 5118 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5119 ): 5120 self._retreat(index) 5121 return None 5122 5123 # handle day-time format interval span with omitted units: 5124 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5125 interval_span_units_omitted = None 5126 if ( 5127 this 5128 and this.is_string 5129 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5130 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5131 ): 5132 index = self._index 5133 5134 # Var "TO" Var 5135 first_unit = self._parse_var(any_token=True, upper=True) 5136 second_unit = None 5137 if first_unit and self._match_text_seq("TO"): 5138 second_unit = self._parse_var(any_token=True, upper=True) 5139 5140 interval_span_units_omitted = not (first_unit and second_unit) 5141 5142 self._retreat(index) 5143 5144 unit = ( 5145 None 5146 if interval_span_units_omitted 5147 else ( 5148 self._parse_function() 5149 or ( 5150 not self._match(TokenType.ALIAS, advance=False) 5151 and self._parse_var(any_token=True, upper=True) 5152 ) 5153 ) 5154 ) 5155 5156 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5157 # each INTERVAL expression into this canonical form so it's easy to transpile 5158 if this and this.is_number: 5159 this = exp.Literal.string(this.to_py()) 5160 elif this and this.is_string: 5161 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5162 if parts and unit: 5163 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5164 unit = None 5165 self._retreat(self._index - 1) 5166 5167 if len(parts) == 1: 5168 this = exp.Literal.string(parts[0][0]) 5169 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5170 5171 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5172 unit = self.expression( 5173 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5174 ) 5175 5176 interval = self.expression(exp.Interval, this=this, unit=unit) 5177 5178 index = self._index 5179 self._match(TokenType.PLUS) 5180 5181 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5182 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5183 return self.expression( 5184 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5185 ) 5186 5187 self._retreat(index) 5188 return interval 5189 5190 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5191 this = self._parse_term() 5192 5193 while True: 5194 if self._match_set(self.BITWISE): 5195 this = self.expression( 5196 self.BITWISE[self._prev.token_type], 5197 this=this, 5198 expression=self._parse_term(), 5199 ) 5200 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5201 this = self.expression( 5202 exp.DPipe, 5203 this=this, 5204 expression=self._parse_term(), 5205 safe=not self.dialect.STRICT_STRING_CONCAT, 5206 ) 5207 elif self._match(TokenType.DQMARK): 5208 this = self.expression( 5209 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5210 ) 5211 elif self._match_pair(TokenType.LT, TokenType.LT): 5212 this = self.expression( 5213 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5214 ) 5215 elif self._match_pair(TokenType.GT, TokenType.GT): 5216 this = self.expression( 5217 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5218 ) 5219 else: 5220 break 5221 5222 return this 5223 5224 def _parse_term(self) -> t.Optional[exp.Expression]: 5225 this = self._parse_factor() 5226 5227 while self._match_set(self.TERM): 5228 klass = self.TERM[self._prev.token_type] 5229 comments = self._prev_comments 5230 expression = self._parse_factor() 5231 5232 this = self.expression(klass, this=this, comments=comments, expression=expression) 5233 5234 if isinstance(this, exp.Collate): 5235 expr = this.expression 5236 5237 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5238 # fallback to Identifier / Var 5239 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5240 ident = expr.this 5241 if isinstance(ident, exp.Identifier): 5242 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5243 5244 return this 5245 5246 def _parse_factor(self) -> t.Optional[exp.Expression]: 5247 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5248 this = parse_method() 5249 5250 while self._match_set(self.FACTOR): 5251 klass = self.FACTOR[self._prev.token_type] 5252 comments = self._prev_comments 5253 expression = parse_method() 5254 5255 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5256 self._retreat(self._index - 1) 5257 return this 5258 5259 this = self.expression(klass, this=this, comments=comments, expression=expression) 5260 5261 if isinstance(this, exp.Div): 5262 this.args["typed"] = self.dialect.TYPED_DIVISION 5263 this.args["safe"] = self.dialect.SAFE_DIVISION 5264 5265 return this 5266 5267 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5268 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5269 5270 def _parse_unary(self) -> t.Optional[exp.Expression]: 5271 if self._match_set(self.UNARY_PARSERS): 5272 return self.UNARY_PARSERS[self._prev.token_type](self) 5273 return self._parse_at_time_zone(self._parse_type()) 5274 5275 def _parse_type( 5276 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5277 ) -> t.Optional[exp.Expression]: 5278 interval = parse_interval and self._parse_interval() 5279 if interval: 5280 return interval 5281 5282 index = self._index 5283 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5284 5285 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5286 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5287 if isinstance(data_type, exp.Cast): 5288 # This constructor can contain ops directly after it, for instance struct unnesting: 5289 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5290 return self._parse_column_ops(data_type) 5291 5292 if data_type: 5293 index2 = self._index 5294 this = self._parse_primary() 5295 5296 if isinstance(this, exp.Literal): 5297 literal = this.name 5298 this = self._parse_column_ops(this) 5299 5300 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5301 if parser: 5302 return parser(self, this, data_type) 5303 5304 if ( 5305 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5306 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5307 and TIME_ZONE_RE.search(literal) 5308 ): 5309 data_type = exp.DataType.build("TIMESTAMPTZ") 5310 5311 return self.expression(exp.Cast, this=this, to=data_type) 5312 5313 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5314 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5315 # 5316 # If the index difference here is greater than 1, that means the parser itself must have 5317 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5318 # 5319 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5320 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5321 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5322 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5323 # 5324 # In these cases, we don't really want to return the converted type, but instead retreat 5325 # and try to parse a Column or Identifier in the section below. 5326 if data_type.expressions and index2 - index > 1: 5327 self._retreat(index2) 5328 return self._parse_column_ops(data_type) 5329 5330 self._retreat(index) 5331 5332 if fallback_to_identifier: 5333 return self._parse_id_var() 5334 5335 this = self._parse_column() 5336 return this and self._parse_column_ops(this) 5337 5338 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5339 this = self._parse_type() 5340 if not this: 5341 return None 5342 5343 if isinstance(this, exp.Column) and not this.table: 5344 this = exp.var(this.name.upper()) 5345 5346 return self.expression( 5347 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5348 ) 5349 5350 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5351 type_name = identifier.name 5352 5353 while self._match(TokenType.DOT): 5354 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5355 5356 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5357 5358 def _parse_types( 5359 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5360 ) -> t.Optional[exp.Expression]: 5361 index = self._index 5362 5363 this: t.Optional[exp.Expression] = None 5364 prefix = self._match_text_seq("SYSUDTLIB", ".") 5365 5366 if self._match_set(self.TYPE_TOKENS): 5367 type_token = self._prev.token_type 5368 else: 5369 type_token = None 5370 identifier = allow_identifiers and self._parse_id_var( 5371 any_token=False, tokens=(TokenType.VAR,) 5372 ) 5373 if isinstance(identifier, exp.Identifier): 5374 try: 5375 tokens = self.dialect.tokenize(identifier.name) 5376 except TokenError: 5377 tokens = None 5378 5379 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5380 type_token = tokens[0].token_type 5381 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5382 this = self._parse_user_defined_type(identifier) 5383 else: 5384 self._retreat(self._index - 1) 5385 return None 5386 else: 5387 return None 5388 5389 if type_token == TokenType.PSEUDO_TYPE: 5390 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5391 5392 if type_token == TokenType.OBJECT_IDENTIFIER: 5393 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5394 5395 # https://materialize.com/docs/sql/types/map/ 5396 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5397 key_type = self._parse_types( 5398 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5399 ) 5400 if not self._match(TokenType.FARROW): 5401 self._retreat(index) 5402 return None 5403 5404 value_type = self._parse_types( 5405 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5406 ) 5407 if not self._match(TokenType.R_BRACKET): 5408 self._retreat(index) 5409 return None 5410 5411 return exp.DataType( 5412 this=exp.DataType.Type.MAP, 5413 expressions=[key_type, value_type], 5414 nested=True, 5415 prefix=prefix, 5416 ) 5417 5418 nested = type_token in self.NESTED_TYPE_TOKENS 5419 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5420 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5421 expressions = None 5422 maybe_func = False 5423 5424 if self._match(TokenType.L_PAREN): 5425 if is_struct: 5426 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5427 elif nested: 5428 expressions = self._parse_csv( 5429 lambda: self._parse_types( 5430 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5431 ) 5432 ) 5433 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5434 this = expressions[0] 5435 this.set("nullable", True) 5436 self._match_r_paren() 5437 return this 5438 elif type_token in self.ENUM_TYPE_TOKENS: 5439 expressions = self._parse_csv(self._parse_equality) 5440 elif is_aggregate: 5441 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5442 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5443 ) 5444 if not func_or_ident: 5445 return None 5446 expressions = [func_or_ident] 5447 if self._match(TokenType.COMMA): 5448 expressions.extend( 5449 self._parse_csv( 5450 lambda: self._parse_types( 5451 check_func=check_func, 5452 schema=schema, 5453 allow_identifiers=allow_identifiers, 5454 ) 5455 ) 5456 ) 5457 else: 5458 expressions = self._parse_csv(self._parse_type_size) 5459 5460 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5461 if type_token == TokenType.VECTOR and len(expressions) == 2: 5462 expressions = self._parse_vector_expressions(expressions) 5463 5464 if not self._match(TokenType.R_PAREN): 5465 self._retreat(index) 5466 return None 5467 5468 maybe_func = True 5469 5470 values: t.Optional[t.List[exp.Expression]] = None 5471 5472 if nested and self._match(TokenType.LT): 5473 if is_struct: 5474 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5475 else: 5476 expressions = self._parse_csv( 5477 lambda: self._parse_types( 5478 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5479 ) 5480 ) 5481 5482 if not self._match(TokenType.GT): 5483 self.raise_error("Expecting >") 5484 5485 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5486 values = self._parse_csv(self._parse_assignment) 5487 if not values and is_struct: 5488 values = None 5489 self._retreat(self._index - 1) 5490 else: 5491 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5492 5493 if type_token in self.TIMESTAMPS: 5494 if self._match_text_seq("WITH", "TIME", "ZONE"): 5495 maybe_func = False 5496 tz_type = ( 5497 exp.DataType.Type.TIMETZ 5498 if type_token in self.TIMES 5499 else exp.DataType.Type.TIMESTAMPTZ 5500 ) 5501 this = exp.DataType(this=tz_type, expressions=expressions) 5502 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5503 maybe_func = False 5504 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5505 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5506 maybe_func = False 5507 elif type_token == TokenType.INTERVAL: 5508 unit = self._parse_var(upper=True) 5509 if unit: 5510 if self._match_text_seq("TO"): 5511 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5512 5513 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5514 else: 5515 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5516 elif type_token == TokenType.VOID: 5517 this = exp.DataType(this=exp.DataType.Type.NULL) 5518 5519 if maybe_func and check_func: 5520 index2 = self._index 5521 peek = self._parse_string() 5522 5523 if not peek: 5524 self._retreat(index) 5525 return None 5526 5527 self._retreat(index2) 5528 5529 if not this: 5530 if self._match_text_seq("UNSIGNED"): 5531 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5532 if not unsigned_type_token: 5533 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5534 5535 type_token = unsigned_type_token or type_token 5536 5537 # NULLABLE without parentheses can be a column (Presto/Trino) 5538 if type_token == TokenType.NULLABLE and not expressions: 5539 self._retreat(index) 5540 return None 5541 5542 this = exp.DataType( 5543 this=exp.DataType.Type[type_token.value], 5544 expressions=expressions, 5545 nested=nested, 5546 prefix=prefix, 5547 ) 5548 5549 # Empty arrays/structs are allowed 5550 if values is not None: 5551 cls = exp.Struct if is_struct else exp.Array 5552 this = exp.cast(cls(expressions=values), this, copy=False) 5553 5554 elif expressions: 5555 this.set("expressions", expressions) 5556 5557 # https://materialize.com/docs/sql/types/list/#type-name 5558 while self._match(TokenType.LIST): 5559 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5560 5561 index = self._index 5562 5563 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5564 matched_array = self._match(TokenType.ARRAY) 5565 5566 while self._curr: 5567 datatype_token = self._prev.token_type 5568 matched_l_bracket = self._match(TokenType.L_BRACKET) 5569 5570 if (not matched_l_bracket and not matched_array) or ( 5571 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5572 ): 5573 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5574 # not to be confused with the fixed size array parsing 5575 break 5576 5577 matched_array = False 5578 values = self._parse_csv(self._parse_assignment) or None 5579 if ( 5580 values 5581 and not schema 5582 and ( 5583 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5584 ) 5585 ): 5586 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5587 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5588 self._retreat(index) 5589 break 5590 5591 this = exp.DataType( 5592 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5593 ) 5594 self._match(TokenType.R_BRACKET) 5595 5596 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5597 converter = self.TYPE_CONVERTERS.get(this.this) 5598 if converter: 5599 this = converter(t.cast(exp.DataType, this)) 5600 5601 return this 5602 5603 def _parse_vector_expressions( 5604 self, expressions: t.List[exp.Expression] 5605 ) -> t.List[exp.Expression]: 5606 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5607 5608 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5609 index = self._index 5610 5611 if ( 5612 self._curr 5613 and self._next 5614 and self._curr.token_type in self.TYPE_TOKENS 5615 and self._next.token_type in self.TYPE_TOKENS 5616 ): 5617 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5618 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5619 this = self._parse_id_var() 5620 else: 5621 this = ( 5622 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5623 or self._parse_id_var() 5624 ) 5625 5626 self._match(TokenType.COLON) 5627 5628 if ( 5629 type_required 5630 and not isinstance(this, exp.DataType) 5631 and not self._match_set(self.TYPE_TOKENS, advance=False) 5632 ): 5633 self._retreat(index) 5634 return self._parse_types() 5635 5636 return self._parse_column_def(this) 5637 5638 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5639 if not self._match_text_seq("AT", "TIME", "ZONE"): 5640 return this 5641 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5642 5643 def _parse_column(self) -> t.Optional[exp.Expression]: 5644 this = self._parse_column_reference() 5645 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5646 5647 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5648 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5649 5650 return column 5651 5652 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5653 this = self._parse_field() 5654 if ( 5655 not this 5656 and self._match(TokenType.VALUES, advance=False) 5657 and self.VALUES_FOLLOWED_BY_PAREN 5658 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5659 ): 5660 this = self._parse_id_var() 5661 5662 if isinstance(this, exp.Identifier): 5663 # We bubble up comments from the Identifier to the Column 5664 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5665 5666 return this 5667 5668 def _parse_colon_as_variant_extract( 5669 self, this: t.Optional[exp.Expression] 5670 ) -> t.Optional[exp.Expression]: 5671 casts = [] 5672 json_path = [] 5673 escape = None 5674 5675 while self._match(TokenType.COLON): 5676 start_index = self._index 5677 5678 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5679 path = self._parse_column_ops( 5680 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5681 ) 5682 5683 # The cast :: operator has a lower precedence than the extraction operator :, so 5684 # we rearrange the AST appropriately to avoid casting the JSON path 5685 while isinstance(path, exp.Cast): 5686 casts.append(path.to) 5687 path = path.this 5688 5689 if casts: 5690 dcolon_offset = next( 5691 i 5692 for i, t in enumerate(self._tokens[start_index:]) 5693 if t.token_type == TokenType.DCOLON 5694 ) 5695 end_token = self._tokens[start_index + dcolon_offset - 1] 5696 else: 5697 end_token = self._prev 5698 5699 if path: 5700 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5701 # it'll roundtrip to a string literal in GET_PATH 5702 if isinstance(path, exp.Identifier) and path.quoted: 5703 escape = True 5704 5705 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5706 5707 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5708 # Databricks transforms it back to the colon/dot notation 5709 if json_path: 5710 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5711 5712 if json_path_expr: 5713 json_path_expr.set("escape", escape) 5714 5715 this = self.expression( 5716 exp.JSONExtract, 5717 this=this, 5718 expression=json_path_expr, 5719 variant_extract=True, 5720 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5721 ) 5722 5723 while casts: 5724 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5725 5726 return this 5727 5728 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5729 return self._parse_types() 5730 5731 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5732 this = self._parse_bracket(this) 5733 5734 while self._match_set(self.COLUMN_OPERATORS): 5735 op_token = self._prev.token_type 5736 op = self.COLUMN_OPERATORS.get(op_token) 5737 5738 if op_token in self.CAST_COLUMN_OPERATORS: 5739 field = self._parse_dcolon() 5740 if not field: 5741 self.raise_error("Expected type") 5742 elif op and self._curr: 5743 field = self._parse_column_reference() or self._parse_bitwise() 5744 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5745 field = self._parse_column_ops(field) 5746 else: 5747 field = self._parse_field(any_token=True, anonymous_func=True) 5748 5749 # Function calls can be qualified, e.g., x.y.FOO() 5750 # This converts the final AST to a series of Dots leading to the function call 5751 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5752 if isinstance(field, (exp.Func, exp.Window)) and this: 5753 this = this.transform( 5754 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5755 ) 5756 5757 if op: 5758 this = op(self, this, field) 5759 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5760 this = self.expression( 5761 exp.Column, 5762 comments=this.comments, 5763 this=field, 5764 table=this.this, 5765 db=this.args.get("table"), 5766 catalog=this.args.get("db"), 5767 ) 5768 elif isinstance(field, exp.Window): 5769 # Move the exp.Dot's to the window's function 5770 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5771 field.set("this", window_func) 5772 this = field 5773 else: 5774 this = self.expression(exp.Dot, this=this, expression=field) 5775 5776 if field and field.comments: 5777 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5778 5779 this = self._parse_bracket(this) 5780 5781 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5782 5783 def _parse_paren(self) -> t.Optional[exp.Expression]: 5784 if not self._match(TokenType.L_PAREN): 5785 return None 5786 5787 comments = self._prev_comments 5788 query = self._parse_select() 5789 5790 if query: 5791 expressions = [query] 5792 else: 5793 expressions = self._parse_expressions() 5794 5795 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5796 5797 if not this and self._match(TokenType.R_PAREN, advance=False): 5798 this = self.expression(exp.Tuple) 5799 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5800 this = self._parse_subquery(this=this, parse_alias=False) 5801 elif isinstance(this, exp.Subquery): 5802 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5803 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5804 this = self.expression(exp.Tuple, expressions=expressions) 5805 else: 5806 this = self.expression(exp.Paren, this=this) 5807 5808 if this: 5809 this.add_comments(comments) 5810 5811 self._match_r_paren(expression=this) 5812 5813 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5814 return self._parse_window(this) 5815 5816 return this 5817 5818 def _parse_primary(self) -> t.Optional[exp.Expression]: 5819 if self._match_set(self.PRIMARY_PARSERS): 5820 token_type = self._prev.token_type 5821 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5822 5823 if token_type == TokenType.STRING: 5824 expressions = [primary] 5825 while self._match(TokenType.STRING): 5826 expressions.append(exp.Literal.string(self._prev.text)) 5827 5828 if len(expressions) > 1: 5829 return self.expression(exp.Concat, expressions=expressions) 5830 5831 return primary 5832 5833 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5834 return exp.Literal.number(f"0.{self._prev.text}") 5835 5836 return self._parse_paren() 5837 5838 def _parse_field( 5839 self, 5840 any_token: bool = False, 5841 tokens: t.Optional[t.Collection[TokenType]] = None, 5842 anonymous_func: bool = False, 5843 ) -> t.Optional[exp.Expression]: 5844 if anonymous_func: 5845 field = ( 5846 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5847 or self._parse_primary() 5848 ) 5849 else: 5850 field = self._parse_primary() or self._parse_function( 5851 anonymous=anonymous_func, any_token=any_token 5852 ) 5853 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5854 5855 def _parse_function( 5856 self, 5857 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5858 anonymous: bool = False, 5859 optional_parens: bool = True, 5860 any_token: bool = False, 5861 ) -> t.Optional[exp.Expression]: 5862 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5863 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5864 fn_syntax = False 5865 if ( 5866 self._match(TokenType.L_BRACE, advance=False) 5867 and self._next 5868 and self._next.text.upper() == "FN" 5869 ): 5870 self._advance(2) 5871 fn_syntax = True 5872 5873 func = self._parse_function_call( 5874 functions=functions, 5875 anonymous=anonymous, 5876 optional_parens=optional_parens, 5877 any_token=any_token, 5878 ) 5879 5880 if fn_syntax: 5881 self._match(TokenType.R_BRACE) 5882 5883 return func 5884 5885 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5886 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5887 5888 def _parse_function_call( 5889 self, 5890 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5891 anonymous: bool = False, 5892 optional_parens: bool = True, 5893 any_token: bool = False, 5894 ) -> t.Optional[exp.Expression]: 5895 if not self._curr: 5896 return None 5897 5898 comments = self._curr.comments 5899 prev = self._prev 5900 token = self._curr 5901 token_type = self._curr.token_type 5902 this = self._curr.text 5903 upper = this.upper() 5904 5905 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5906 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5907 self._advance() 5908 return self._parse_window(parser(self)) 5909 5910 if not self._next or self._next.token_type != TokenType.L_PAREN: 5911 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5912 self._advance() 5913 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5914 5915 return None 5916 5917 if any_token: 5918 if token_type in self.RESERVED_TOKENS: 5919 return None 5920 elif token_type not in self.FUNC_TOKENS: 5921 return None 5922 5923 self._advance(2) 5924 5925 parser = self.FUNCTION_PARSERS.get(upper) 5926 if parser and not anonymous: 5927 this = parser(self) 5928 else: 5929 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5930 5931 if subquery_predicate: 5932 expr = None 5933 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5934 expr = self._parse_select() 5935 self._match_r_paren() 5936 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5937 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5938 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5939 self._advance(-1) 5940 expr = self._parse_bitwise() 5941 5942 if expr: 5943 return self.expression(subquery_predicate, comments=comments, this=expr) 5944 5945 if functions is None: 5946 functions = self.FUNCTIONS 5947 5948 function = functions.get(upper) 5949 known_function = function and not anonymous 5950 5951 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5952 args = self._parse_function_args(alias) 5953 5954 post_func_comments = self._curr and self._curr.comments 5955 if known_function and post_func_comments: 5956 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5957 # call we'll construct it as exp.Anonymous, even if it's "known" 5958 if any( 5959 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5960 for comment in post_func_comments 5961 ): 5962 known_function = False 5963 5964 if alias and known_function: 5965 args = self._kv_to_prop_eq(args) 5966 5967 if known_function: 5968 func_builder = t.cast(t.Callable, function) 5969 5970 if "dialect" in func_builder.__code__.co_varnames: 5971 func = func_builder(args, dialect=self.dialect) 5972 else: 5973 func = func_builder(args) 5974 5975 func = self.validate_expression(func, args) 5976 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5977 func.meta["name"] = this 5978 5979 this = func 5980 else: 5981 if token_type == TokenType.IDENTIFIER: 5982 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5983 5984 this = self.expression(exp.Anonymous, this=this, expressions=args) 5985 this = this.update_positions(token) 5986 5987 if isinstance(this, exp.Expression): 5988 this.add_comments(comments) 5989 5990 self._match_r_paren(this) 5991 return self._parse_window(this) 5992 5993 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5994 return expression 5995 5996 def _kv_to_prop_eq( 5997 self, expressions: t.List[exp.Expression], parse_map: bool = False 5998 ) -> t.List[exp.Expression]: 5999 transformed = [] 6000 6001 for index, e in enumerate(expressions): 6002 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6003 if isinstance(e, exp.Alias): 6004 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6005 6006 if not isinstance(e, exp.PropertyEQ): 6007 e = self.expression( 6008 exp.PropertyEQ, 6009 this=e.this if parse_map else exp.to_identifier(e.this.name), 6010 expression=e.expression, 6011 ) 6012 6013 if isinstance(e.this, exp.Column): 6014 e.this.replace(e.this.this) 6015 else: 6016 e = self._to_prop_eq(e, index) 6017 6018 transformed.append(e) 6019 6020 return transformed 6021 6022 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6023 return self._parse_statement() 6024 6025 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6026 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6027 6028 def _parse_user_defined_function( 6029 self, kind: t.Optional[TokenType] = None 6030 ) -> t.Optional[exp.Expression]: 6031 this = self._parse_table_parts(schema=True) 6032 6033 if not self._match(TokenType.L_PAREN): 6034 return this 6035 6036 expressions = self._parse_csv(self._parse_function_parameter) 6037 self._match_r_paren() 6038 return self.expression( 6039 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6040 ) 6041 6042 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6043 literal = self._parse_primary() 6044 if literal: 6045 return self.expression(exp.Introducer, this=token.text, expression=literal) 6046 6047 return self._identifier_expression(token) 6048 6049 def _parse_session_parameter(self) -> exp.SessionParameter: 6050 kind = None 6051 this = self._parse_id_var() or self._parse_primary() 6052 6053 if this and self._match(TokenType.DOT): 6054 kind = this.name 6055 this = self._parse_var() or self._parse_primary() 6056 6057 return self.expression(exp.SessionParameter, this=this, kind=kind) 6058 6059 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6060 return self._parse_id_var() 6061 6062 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6063 index = self._index 6064 6065 if self._match(TokenType.L_PAREN): 6066 expressions = t.cast( 6067 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6068 ) 6069 6070 if not self._match(TokenType.R_PAREN): 6071 self._retreat(index) 6072 else: 6073 expressions = [self._parse_lambda_arg()] 6074 6075 if self._match_set(self.LAMBDAS): 6076 return self.LAMBDAS[self._prev.token_type](self, expressions) 6077 6078 self._retreat(index) 6079 6080 this: t.Optional[exp.Expression] 6081 6082 if self._match(TokenType.DISTINCT): 6083 this = self.expression( 6084 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6085 ) 6086 else: 6087 this = self._parse_select_or_expression(alias=alias) 6088 6089 return self._parse_limit( 6090 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6091 ) 6092 6093 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6094 index = self._index 6095 if not self._match(TokenType.L_PAREN): 6096 return this 6097 6098 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6099 # expr can be of both types 6100 if self._match_set(self.SELECT_START_TOKENS): 6101 self._retreat(index) 6102 return this 6103 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6104 self._match_r_paren() 6105 return self.expression(exp.Schema, this=this, expressions=args) 6106 6107 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6108 return self._parse_column_def(self._parse_field(any_token=True)) 6109 6110 def _parse_column_def( 6111 self, this: t.Optional[exp.Expression], computed_column: bool = True 6112 ) -> t.Optional[exp.Expression]: 6113 # column defs are not really columns, they're identifiers 6114 if isinstance(this, exp.Column): 6115 this = this.this 6116 6117 if not computed_column: 6118 self._match(TokenType.ALIAS) 6119 6120 kind = self._parse_types(schema=True) 6121 6122 if self._match_text_seq("FOR", "ORDINALITY"): 6123 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6124 6125 constraints: t.List[exp.Expression] = [] 6126 6127 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6128 ("ALIAS", "MATERIALIZED") 6129 ): 6130 persisted = self._prev.text.upper() == "MATERIALIZED" 6131 constraint_kind = exp.ComputedColumnConstraint( 6132 this=self._parse_assignment(), 6133 persisted=persisted or self._match_text_seq("PERSISTED"), 6134 data_type=exp.Var(this="AUTO") 6135 if self._match_text_seq("AUTO") 6136 else self._parse_types(), 6137 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6138 ) 6139 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6140 elif ( 6141 kind 6142 and self._match(TokenType.ALIAS, advance=False) 6143 and ( 6144 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6145 or (self._next and self._next.token_type == TokenType.L_PAREN) 6146 ) 6147 ): 6148 self._advance() 6149 constraints.append( 6150 self.expression( 6151 exp.ColumnConstraint, 6152 kind=exp.ComputedColumnConstraint( 6153 this=self._parse_disjunction(), 6154 persisted=self._match_texts(("STORED", "VIRTUAL")) 6155 and self._prev.text.upper() == "STORED", 6156 ), 6157 ) 6158 ) 6159 6160 while True: 6161 constraint = self._parse_column_constraint() 6162 if not constraint: 6163 break 6164 constraints.append(constraint) 6165 6166 if not kind and not constraints: 6167 return this 6168 6169 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6170 6171 def _parse_auto_increment( 6172 self, 6173 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6174 start = None 6175 increment = None 6176 order = None 6177 6178 if self._match(TokenType.L_PAREN, advance=False): 6179 args = self._parse_wrapped_csv(self._parse_bitwise) 6180 start = seq_get(args, 0) 6181 increment = seq_get(args, 1) 6182 elif self._match_text_seq("START"): 6183 start = self._parse_bitwise() 6184 self._match_text_seq("INCREMENT") 6185 increment = self._parse_bitwise() 6186 if self._match_text_seq("ORDER"): 6187 order = True 6188 elif self._match_text_seq("NOORDER"): 6189 order = False 6190 6191 if start and increment: 6192 return exp.GeneratedAsIdentityColumnConstraint( 6193 start=start, increment=increment, this=False, order=order 6194 ) 6195 6196 return exp.AutoIncrementColumnConstraint() 6197 6198 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6199 if not self._match_text_seq("REFRESH"): 6200 self._retreat(self._index - 1) 6201 return None 6202 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6203 6204 def _parse_compress(self) -> exp.CompressColumnConstraint: 6205 if self._match(TokenType.L_PAREN, advance=False): 6206 return self.expression( 6207 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6208 ) 6209 6210 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6211 6212 def _parse_generated_as_identity( 6213 self, 6214 ) -> ( 6215 exp.GeneratedAsIdentityColumnConstraint 6216 | exp.ComputedColumnConstraint 6217 | exp.GeneratedAsRowColumnConstraint 6218 ): 6219 if self._match_text_seq("BY", "DEFAULT"): 6220 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6221 this = self.expression( 6222 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6223 ) 6224 else: 6225 self._match_text_seq("ALWAYS") 6226 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6227 6228 self._match(TokenType.ALIAS) 6229 6230 if self._match_text_seq("ROW"): 6231 start = self._match_text_seq("START") 6232 if not start: 6233 self._match(TokenType.END) 6234 hidden = self._match_text_seq("HIDDEN") 6235 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6236 6237 identity = self._match_text_seq("IDENTITY") 6238 6239 if self._match(TokenType.L_PAREN): 6240 if self._match(TokenType.START_WITH): 6241 this.set("start", self._parse_bitwise()) 6242 if self._match_text_seq("INCREMENT", "BY"): 6243 this.set("increment", self._parse_bitwise()) 6244 if self._match_text_seq("MINVALUE"): 6245 this.set("minvalue", self._parse_bitwise()) 6246 if self._match_text_seq("MAXVALUE"): 6247 this.set("maxvalue", self._parse_bitwise()) 6248 6249 if self._match_text_seq("CYCLE"): 6250 this.set("cycle", True) 6251 elif self._match_text_seq("NO", "CYCLE"): 6252 this.set("cycle", False) 6253 6254 if not identity: 6255 this.set("expression", self._parse_range()) 6256 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6257 args = self._parse_csv(self._parse_bitwise) 6258 this.set("start", seq_get(args, 0)) 6259 this.set("increment", seq_get(args, 1)) 6260 6261 self._match_r_paren() 6262 6263 return this 6264 6265 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6266 self._match_text_seq("LENGTH") 6267 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6268 6269 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6270 if self._match_text_seq("NULL"): 6271 return self.expression(exp.NotNullColumnConstraint) 6272 if self._match_text_seq("CASESPECIFIC"): 6273 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6274 if self._match_text_seq("FOR", "REPLICATION"): 6275 return self.expression(exp.NotForReplicationColumnConstraint) 6276 6277 # Unconsume the `NOT` token 6278 self._retreat(self._index - 1) 6279 return None 6280 6281 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6282 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6283 6284 procedure_option_follows = ( 6285 self._match(TokenType.WITH, advance=False) 6286 and self._next 6287 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6288 ) 6289 6290 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6291 return self.expression( 6292 exp.ColumnConstraint, 6293 this=this, 6294 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6295 ) 6296 6297 return this 6298 6299 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6300 if not self._match(TokenType.CONSTRAINT): 6301 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6302 6303 return self.expression( 6304 exp.Constraint, 6305 this=self._parse_id_var(), 6306 expressions=self._parse_unnamed_constraints(), 6307 ) 6308 6309 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6310 constraints = [] 6311 while True: 6312 constraint = self._parse_unnamed_constraint() or self._parse_function() 6313 if not constraint: 6314 break 6315 constraints.append(constraint) 6316 6317 return constraints 6318 6319 def _parse_unnamed_constraint( 6320 self, constraints: t.Optional[t.Collection[str]] = None 6321 ) -> t.Optional[exp.Expression]: 6322 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6323 constraints or self.CONSTRAINT_PARSERS 6324 ): 6325 return None 6326 6327 constraint = self._prev.text.upper() 6328 if constraint not in self.CONSTRAINT_PARSERS: 6329 self.raise_error(f"No parser found for schema constraint {constraint}.") 6330 6331 return self.CONSTRAINT_PARSERS[constraint](self) 6332 6333 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6334 return self._parse_id_var(any_token=False) 6335 6336 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6337 self._match_texts(("KEY", "INDEX")) 6338 return self.expression( 6339 exp.UniqueColumnConstraint, 6340 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6341 this=self._parse_schema(self._parse_unique_key()), 6342 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6343 on_conflict=self._parse_on_conflict(), 6344 options=self._parse_key_constraint_options(), 6345 ) 6346 6347 def _parse_key_constraint_options(self) -> t.List[str]: 6348 options = [] 6349 while True: 6350 if not self._curr: 6351 break 6352 6353 if self._match(TokenType.ON): 6354 action = None 6355 on = self._advance_any() and self._prev.text 6356 6357 if self._match_text_seq("NO", "ACTION"): 6358 action = "NO ACTION" 6359 elif self._match_text_seq("CASCADE"): 6360 action = "CASCADE" 6361 elif self._match_text_seq("RESTRICT"): 6362 action = "RESTRICT" 6363 elif self._match_pair(TokenType.SET, TokenType.NULL): 6364 action = "SET NULL" 6365 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6366 action = "SET DEFAULT" 6367 else: 6368 self.raise_error("Invalid key constraint") 6369 6370 options.append(f"ON {on} {action}") 6371 else: 6372 var = self._parse_var_from_options( 6373 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6374 ) 6375 if not var: 6376 break 6377 options.append(var.name) 6378 6379 return options 6380 6381 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6382 if match and not self._match(TokenType.REFERENCES): 6383 return None 6384 6385 expressions = None 6386 this = self._parse_table(schema=True) 6387 options = self._parse_key_constraint_options() 6388 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6389 6390 def _parse_foreign_key(self) -> exp.ForeignKey: 6391 expressions = ( 6392 self._parse_wrapped_id_vars() 6393 if not self._match(TokenType.REFERENCES, advance=False) 6394 else None 6395 ) 6396 reference = self._parse_references() 6397 on_options = {} 6398 6399 while self._match(TokenType.ON): 6400 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6401 self.raise_error("Expected DELETE or UPDATE") 6402 6403 kind = self._prev.text.lower() 6404 6405 if self._match_text_seq("NO", "ACTION"): 6406 action = "NO ACTION" 6407 elif self._match(TokenType.SET): 6408 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6409 action = "SET " + self._prev.text.upper() 6410 else: 6411 self._advance() 6412 action = self._prev.text.upper() 6413 6414 on_options[kind] = action 6415 6416 return self.expression( 6417 exp.ForeignKey, 6418 expressions=expressions, 6419 reference=reference, 6420 options=self._parse_key_constraint_options(), 6421 **on_options, # type: ignore 6422 ) 6423 6424 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6425 return self._parse_ordered() or self._parse_field() 6426 6427 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6428 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6429 self._retreat(self._index - 1) 6430 return None 6431 6432 id_vars = self._parse_wrapped_id_vars() 6433 return self.expression( 6434 exp.PeriodForSystemTimeConstraint, 6435 this=seq_get(id_vars, 0), 6436 expression=seq_get(id_vars, 1), 6437 ) 6438 6439 def _parse_primary_key( 6440 self, wrapped_optional: bool = False, in_props: bool = False 6441 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6442 desc = ( 6443 self._match_set((TokenType.ASC, TokenType.DESC)) 6444 and self._prev.token_type == TokenType.DESC 6445 ) 6446 6447 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6448 return self.expression( 6449 exp.PrimaryKeyColumnConstraint, 6450 desc=desc, 6451 options=self._parse_key_constraint_options(), 6452 ) 6453 6454 expressions = self._parse_wrapped_csv( 6455 self._parse_primary_key_part, optional=wrapped_optional 6456 ) 6457 6458 return self.expression( 6459 exp.PrimaryKey, 6460 expressions=expressions, 6461 include=self._parse_index_params(), 6462 options=self._parse_key_constraint_options(), 6463 ) 6464 6465 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6466 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6467 6468 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6469 """ 6470 Parses a datetime column in ODBC format. We parse the column into the corresponding 6471 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6472 same as we did for `DATE('yyyy-mm-dd')`. 6473 6474 Reference: 6475 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6476 """ 6477 self._match(TokenType.VAR) 6478 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6479 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6480 if not self._match(TokenType.R_BRACE): 6481 self.raise_error("Expected }") 6482 return expression 6483 6484 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6485 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6486 return this 6487 6488 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6489 map_token = seq_get(self._tokens, self._index - 2) 6490 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6491 else: 6492 parse_map = False 6493 6494 bracket_kind = self._prev.token_type 6495 if ( 6496 bracket_kind == TokenType.L_BRACE 6497 and self._curr 6498 and self._curr.token_type == TokenType.VAR 6499 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6500 ): 6501 return self._parse_odbc_datetime_literal() 6502 6503 expressions = self._parse_csv( 6504 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6505 ) 6506 6507 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6508 self.raise_error("Expected ]") 6509 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6510 self.raise_error("Expected }") 6511 6512 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6513 if bracket_kind == TokenType.L_BRACE: 6514 this = self.expression( 6515 exp.Struct, 6516 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6517 ) 6518 elif not this: 6519 this = build_array_constructor( 6520 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6521 ) 6522 else: 6523 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6524 if constructor_type: 6525 return build_array_constructor( 6526 constructor_type, 6527 args=expressions, 6528 bracket_kind=bracket_kind, 6529 dialect=self.dialect, 6530 ) 6531 6532 expressions = apply_index_offset( 6533 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6534 ) 6535 this = self.expression( 6536 exp.Bracket, 6537 this=this, 6538 expressions=expressions, 6539 comments=this.pop_comments(), 6540 ) 6541 6542 self._add_comments(this) 6543 return self._parse_bracket(this) 6544 6545 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6546 if self._match(TokenType.COLON): 6547 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6548 return this 6549 6550 def _parse_case(self) -> t.Optional[exp.Expression]: 6551 if self._match(TokenType.DOT, advance=False): 6552 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6553 self._retreat(self._index - 1) 6554 return None 6555 6556 ifs = [] 6557 default = None 6558 6559 comments = self._prev_comments 6560 expression = self._parse_assignment() 6561 6562 while self._match(TokenType.WHEN): 6563 this = self._parse_assignment() 6564 self._match(TokenType.THEN) 6565 then = self._parse_assignment() 6566 ifs.append(self.expression(exp.If, this=this, true=then)) 6567 6568 if self._match(TokenType.ELSE): 6569 default = self._parse_assignment() 6570 6571 if not self._match(TokenType.END): 6572 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6573 default = exp.column("interval") 6574 else: 6575 self.raise_error("Expected END after CASE", self._prev) 6576 6577 return self.expression( 6578 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6579 ) 6580 6581 def _parse_if(self) -> t.Optional[exp.Expression]: 6582 if self._match(TokenType.L_PAREN): 6583 args = self._parse_csv( 6584 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6585 ) 6586 this = self.validate_expression(exp.If.from_arg_list(args), args) 6587 self._match_r_paren() 6588 else: 6589 index = self._index - 1 6590 6591 if self.NO_PAREN_IF_COMMANDS and index == 0: 6592 return self._parse_as_command(self._prev) 6593 6594 condition = self._parse_assignment() 6595 6596 if not condition: 6597 self._retreat(index) 6598 return None 6599 6600 self._match(TokenType.THEN) 6601 true = self._parse_assignment() 6602 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6603 self._match(TokenType.END) 6604 this = self.expression(exp.If, this=condition, true=true, false=false) 6605 6606 return this 6607 6608 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6609 if not self._match_text_seq("VALUE", "FOR"): 6610 self._retreat(self._index - 1) 6611 return None 6612 6613 return self.expression( 6614 exp.NextValueFor, 6615 this=self._parse_column(), 6616 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6617 ) 6618 6619 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6620 this = self._parse_function() or self._parse_var_or_string(upper=True) 6621 6622 if self._match(TokenType.FROM): 6623 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6624 6625 if not self._match(TokenType.COMMA): 6626 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6627 6628 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6629 6630 def _parse_gap_fill(self) -> exp.GapFill: 6631 self._match(TokenType.TABLE) 6632 this = self._parse_table() 6633 6634 self._match(TokenType.COMMA) 6635 args = [this, *self._parse_csv(self._parse_lambda)] 6636 6637 gap_fill = exp.GapFill.from_arg_list(args) 6638 return self.validate_expression(gap_fill, args) 6639 6640 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6641 this = self._parse_assignment() 6642 6643 if not self._match(TokenType.ALIAS): 6644 if self._match(TokenType.COMMA): 6645 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6646 6647 self.raise_error("Expected AS after CAST") 6648 6649 fmt = None 6650 to = self._parse_types() 6651 6652 default = self._match(TokenType.DEFAULT) 6653 if default: 6654 default = self._parse_bitwise() 6655 self._match_text_seq("ON", "CONVERSION", "ERROR") 6656 6657 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6658 fmt_string = self._parse_string() 6659 fmt = self._parse_at_time_zone(fmt_string) 6660 6661 if not to: 6662 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6663 if to.this in exp.DataType.TEMPORAL_TYPES: 6664 this = self.expression( 6665 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6666 this=this, 6667 format=exp.Literal.string( 6668 format_time( 6669 fmt_string.this if fmt_string else "", 6670 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6671 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6672 ) 6673 ), 6674 safe=safe, 6675 ) 6676 6677 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6678 this.set("zone", fmt.args["zone"]) 6679 return this 6680 elif not to: 6681 self.raise_error("Expected TYPE after CAST") 6682 elif isinstance(to, exp.Identifier): 6683 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6684 elif to.this == exp.DataType.Type.CHAR: 6685 if self._match(TokenType.CHARACTER_SET): 6686 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6687 6688 return self.build_cast( 6689 strict=strict, 6690 this=this, 6691 to=to, 6692 format=fmt, 6693 safe=safe, 6694 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6695 default=default, 6696 ) 6697 6698 def _parse_string_agg(self) -> exp.GroupConcat: 6699 if self._match(TokenType.DISTINCT): 6700 args: t.List[t.Optional[exp.Expression]] = [ 6701 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6702 ] 6703 if self._match(TokenType.COMMA): 6704 args.extend(self._parse_csv(self._parse_assignment)) 6705 else: 6706 args = self._parse_csv(self._parse_assignment) # type: ignore 6707 6708 if self._match_text_seq("ON", "OVERFLOW"): 6709 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6710 if self._match_text_seq("ERROR"): 6711 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6712 else: 6713 self._match_text_seq("TRUNCATE") 6714 on_overflow = self.expression( 6715 exp.OverflowTruncateBehavior, 6716 this=self._parse_string(), 6717 with_count=( 6718 self._match_text_seq("WITH", "COUNT") 6719 or not self._match_text_seq("WITHOUT", "COUNT") 6720 ), 6721 ) 6722 else: 6723 on_overflow = None 6724 6725 index = self._index 6726 if not self._match(TokenType.R_PAREN) and args: 6727 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6728 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6729 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6730 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6731 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6732 6733 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6734 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6735 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6736 if not self._match_text_seq("WITHIN", "GROUP"): 6737 self._retreat(index) 6738 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6739 6740 # The corresponding match_r_paren will be called in parse_function (caller) 6741 self._match_l_paren() 6742 6743 return self.expression( 6744 exp.GroupConcat, 6745 this=self._parse_order(this=seq_get(args, 0)), 6746 separator=seq_get(args, 1), 6747 on_overflow=on_overflow, 6748 ) 6749 6750 def _parse_convert( 6751 self, strict: bool, safe: t.Optional[bool] = None 6752 ) -> t.Optional[exp.Expression]: 6753 this = self._parse_bitwise() 6754 6755 if self._match(TokenType.USING): 6756 to: t.Optional[exp.Expression] = self.expression( 6757 exp.CharacterSet, this=self._parse_var() 6758 ) 6759 elif self._match(TokenType.COMMA): 6760 to = self._parse_types() 6761 else: 6762 to = None 6763 6764 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6765 6766 def _parse_xml_table(self) -> exp.XMLTable: 6767 namespaces = None 6768 passing = None 6769 columns = None 6770 6771 if self._match_text_seq("XMLNAMESPACES", "("): 6772 namespaces = self._parse_xml_namespace() 6773 self._match_text_seq(")", ",") 6774 6775 this = self._parse_string() 6776 6777 if self._match_text_seq("PASSING"): 6778 # The BY VALUE keywords are optional and are provided for semantic clarity 6779 self._match_text_seq("BY", "VALUE") 6780 passing = self._parse_csv(self._parse_column) 6781 6782 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6783 6784 if self._match_text_seq("COLUMNS"): 6785 columns = self._parse_csv(self._parse_field_def) 6786 6787 return self.expression( 6788 exp.XMLTable, 6789 this=this, 6790 namespaces=namespaces, 6791 passing=passing, 6792 columns=columns, 6793 by_ref=by_ref, 6794 ) 6795 6796 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6797 namespaces = [] 6798 6799 while True: 6800 if self._match(TokenType.DEFAULT): 6801 uri = self._parse_string() 6802 else: 6803 uri = self._parse_alias(self._parse_string()) 6804 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6805 if not self._match(TokenType.COMMA): 6806 break 6807 6808 return namespaces 6809 6810 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6811 args = self._parse_csv(self._parse_assignment) 6812 6813 if len(args) < 3: 6814 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6815 6816 return self.expression(exp.DecodeCase, expressions=args) 6817 6818 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6819 self._match_text_seq("KEY") 6820 key = self._parse_column() 6821 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6822 self._match_text_seq("VALUE") 6823 value = self._parse_bitwise() 6824 6825 if not key and not value: 6826 return None 6827 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6828 6829 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6830 if not this or not self._match_text_seq("FORMAT", "JSON"): 6831 return this 6832 6833 return self.expression(exp.FormatJson, this=this) 6834 6835 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6836 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6837 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6838 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6839 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6840 else: 6841 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6842 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6843 6844 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6845 6846 if not empty and not error and not null: 6847 return None 6848 6849 return self.expression( 6850 exp.OnCondition, 6851 empty=empty, 6852 error=error, 6853 null=null, 6854 ) 6855 6856 def _parse_on_handling( 6857 self, on: str, *values: str 6858 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6859 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6860 for value in values: 6861 if self._match_text_seq(value, "ON", on): 6862 return f"{value} ON {on}" 6863 6864 index = self._index 6865 if self._match(TokenType.DEFAULT): 6866 default_value = self._parse_bitwise() 6867 if self._match_text_seq("ON", on): 6868 return default_value 6869 6870 self._retreat(index) 6871 6872 return None 6873 6874 @t.overload 6875 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6876 6877 @t.overload 6878 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6879 6880 def _parse_json_object(self, agg=False): 6881 star = self._parse_star() 6882 expressions = ( 6883 [star] 6884 if star 6885 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6886 ) 6887 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6888 6889 unique_keys = None 6890 if self._match_text_seq("WITH", "UNIQUE"): 6891 unique_keys = True 6892 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6893 unique_keys = False 6894 6895 self._match_text_seq("KEYS") 6896 6897 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6898 self._parse_type() 6899 ) 6900 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6901 6902 return self.expression( 6903 exp.JSONObjectAgg if agg else exp.JSONObject, 6904 expressions=expressions, 6905 null_handling=null_handling, 6906 unique_keys=unique_keys, 6907 return_type=return_type, 6908 encoding=encoding, 6909 ) 6910 6911 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6912 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6913 if not self._match_text_seq("NESTED"): 6914 this = self._parse_id_var() 6915 kind = self._parse_types(allow_identifiers=False) 6916 nested = None 6917 else: 6918 this = None 6919 kind = None 6920 nested = True 6921 6922 path = self._match_text_seq("PATH") and self._parse_string() 6923 nested_schema = nested and self._parse_json_schema() 6924 6925 return self.expression( 6926 exp.JSONColumnDef, 6927 this=this, 6928 kind=kind, 6929 path=path, 6930 nested_schema=nested_schema, 6931 ) 6932 6933 def _parse_json_schema(self) -> exp.JSONSchema: 6934 self._match_text_seq("COLUMNS") 6935 return self.expression( 6936 exp.JSONSchema, 6937 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6938 ) 6939 6940 def _parse_json_table(self) -> exp.JSONTable: 6941 this = self._parse_format_json(self._parse_bitwise()) 6942 path = self._match(TokenType.COMMA) and self._parse_string() 6943 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6944 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6945 schema = self._parse_json_schema() 6946 6947 return exp.JSONTable( 6948 this=this, 6949 schema=schema, 6950 path=path, 6951 error_handling=error_handling, 6952 empty_handling=empty_handling, 6953 ) 6954 6955 def _parse_match_against(self) -> exp.MatchAgainst: 6956 if self._match_text_seq("TABLE"): 6957 # parse SingleStore MATCH(TABLE ...) syntax 6958 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6959 expressions = [] 6960 table = self._parse_table() 6961 if table: 6962 expressions = [table] 6963 else: 6964 expressions = self._parse_csv(self._parse_column) 6965 6966 self._match_text_seq(")", "AGAINST", "(") 6967 6968 this = self._parse_string() 6969 6970 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6971 modifier = "IN NATURAL LANGUAGE MODE" 6972 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6973 modifier = f"{modifier} WITH QUERY EXPANSION" 6974 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6975 modifier = "IN BOOLEAN MODE" 6976 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6977 modifier = "WITH QUERY EXPANSION" 6978 else: 6979 modifier = None 6980 6981 return self.expression( 6982 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6983 ) 6984 6985 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6986 def _parse_open_json(self) -> exp.OpenJSON: 6987 this = self._parse_bitwise() 6988 path = self._match(TokenType.COMMA) and self._parse_string() 6989 6990 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6991 this = self._parse_field(any_token=True) 6992 kind = self._parse_types() 6993 path = self._parse_string() 6994 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6995 6996 return self.expression( 6997 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6998 ) 6999 7000 expressions = None 7001 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7002 self._match_l_paren() 7003 expressions = self._parse_csv(_parse_open_json_column_def) 7004 7005 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7006 7007 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7008 args = self._parse_csv(self._parse_bitwise) 7009 7010 if self._match(TokenType.IN): 7011 return self.expression( 7012 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7013 ) 7014 7015 if haystack_first: 7016 haystack = seq_get(args, 0) 7017 needle = seq_get(args, 1) 7018 else: 7019 haystack = seq_get(args, 1) 7020 needle = seq_get(args, 0) 7021 7022 return self.expression( 7023 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7024 ) 7025 7026 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7027 args = self._parse_csv(self._parse_table) 7028 return exp.JoinHint(this=func_name.upper(), expressions=args) 7029 7030 def _parse_substring(self) -> exp.Substring: 7031 # Postgres supports the form: substring(string [from int] [for int]) 7032 # (despite being undocumented, the reverse order also works) 7033 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7034 7035 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7036 7037 start, length = None, None 7038 7039 while self._curr: 7040 if self._match(TokenType.FROM): 7041 start = self._parse_bitwise() 7042 elif self._match(TokenType.FOR): 7043 if not start: 7044 start = exp.Literal.number(1) 7045 length = self._parse_bitwise() 7046 else: 7047 break 7048 7049 if start: 7050 args.append(start) 7051 if length: 7052 args.append(length) 7053 7054 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7055 7056 def _parse_trim(self) -> exp.Trim: 7057 # https://www.w3resource.com/sql/character-functions/trim.php 7058 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7059 7060 position = None 7061 collation = None 7062 expression = None 7063 7064 if self._match_texts(self.TRIM_TYPES): 7065 position = self._prev.text.upper() 7066 7067 this = self._parse_bitwise() 7068 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7069 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7070 expression = self._parse_bitwise() 7071 7072 if invert_order: 7073 this, expression = expression, this 7074 7075 if self._match(TokenType.COLLATE): 7076 collation = self._parse_bitwise() 7077 7078 return self.expression( 7079 exp.Trim, this=this, position=position, expression=expression, collation=collation 7080 ) 7081 7082 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7083 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7084 7085 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7086 return self._parse_window(self._parse_id_var(), alias=True) 7087 7088 def _parse_respect_or_ignore_nulls( 7089 self, this: t.Optional[exp.Expression] 7090 ) -> t.Optional[exp.Expression]: 7091 if self._match_text_seq("IGNORE", "NULLS"): 7092 return self.expression(exp.IgnoreNulls, this=this) 7093 if self._match_text_seq("RESPECT", "NULLS"): 7094 return self.expression(exp.RespectNulls, this=this) 7095 return this 7096 7097 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7098 if self._match(TokenType.HAVING): 7099 self._match_texts(("MAX", "MIN")) 7100 max = self._prev.text.upper() != "MIN" 7101 return self.expression( 7102 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7103 ) 7104 7105 return this 7106 7107 def _parse_window( 7108 self, this: t.Optional[exp.Expression], alias: bool = False 7109 ) -> t.Optional[exp.Expression]: 7110 func = this 7111 comments = func.comments if isinstance(func, exp.Expression) else None 7112 7113 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7114 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7115 if self._match_text_seq("WITHIN", "GROUP"): 7116 order = self._parse_wrapped(self._parse_order) 7117 this = self.expression(exp.WithinGroup, this=this, expression=order) 7118 7119 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7120 self._match(TokenType.WHERE) 7121 this = self.expression( 7122 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7123 ) 7124 self._match_r_paren() 7125 7126 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7127 # Some dialects choose to implement and some do not. 7128 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7129 7130 # There is some code above in _parse_lambda that handles 7131 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7132 7133 # The below changes handle 7134 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7135 7136 # Oracle allows both formats 7137 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7138 # and Snowflake chose to do the same for familiarity 7139 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7140 if isinstance(this, exp.AggFunc): 7141 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7142 7143 if ignore_respect and ignore_respect is not this: 7144 ignore_respect.replace(ignore_respect.this) 7145 this = self.expression(ignore_respect.__class__, this=this) 7146 7147 this = self._parse_respect_or_ignore_nulls(this) 7148 7149 # bigquery select from window x AS (partition by ...) 7150 if alias: 7151 over = None 7152 self._match(TokenType.ALIAS) 7153 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7154 return this 7155 else: 7156 over = self._prev.text.upper() 7157 7158 if comments and isinstance(func, exp.Expression): 7159 func.pop_comments() 7160 7161 if not self._match(TokenType.L_PAREN): 7162 return self.expression( 7163 exp.Window, 7164 comments=comments, 7165 this=this, 7166 alias=self._parse_id_var(False), 7167 over=over, 7168 ) 7169 7170 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7171 7172 first = self._match(TokenType.FIRST) 7173 if self._match_text_seq("LAST"): 7174 first = False 7175 7176 partition, order = self._parse_partition_and_order() 7177 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7178 7179 if kind: 7180 self._match(TokenType.BETWEEN) 7181 start = self._parse_window_spec() 7182 7183 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7184 exclude = ( 7185 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7186 if self._match_text_seq("EXCLUDE") 7187 else None 7188 ) 7189 7190 spec = self.expression( 7191 exp.WindowSpec, 7192 kind=kind, 7193 start=start["value"], 7194 start_side=start["side"], 7195 end=end.get("value"), 7196 end_side=end.get("side"), 7197 exclude=exclude, 7198 ) 7199 else: 7200 spec = None 7201 7202 self._match_r_paren() 7203 7204 window = self.expression( 7205 exp.Window, 7206 comments=comments, 7207 this=this, 7208 partition_by=partition, 7209 order=order, 7210 spec=spec, 7211 alias=window_alias, 7212 over=over, 7213 first=first, 7214 ) 7215 7216 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7217 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7218 return self._parse_window(window, alias=alias) 7219 7220 return window 7221 7222 def _parse_partition_and_order( 7223 self, 7224 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7225 return self._parse_partition_by(), self._parse_order() 7226 7227 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7228 self._match(TokenType.BETWEEN) 7229 7230 return { 7231 "value": ( 7232 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7233 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7234 or self._parse_type() 7235 ), 7236 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7237 } 7238 7239 def _parse_alias( 7240 self, this: t.Optional[exp.Expression], explicit: bool = False 7241 ) -> t.Optional[exp.Expression]: 7242 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7243 # so this section tries to parse the clause version and if it fails, it treats the token 7244 # as an identifier (alias) 7245 if self._can_parse_limit_or_offset(): 7246 return this 7247 7248 any_token = self._match(TokenType.ALIAS) 7249 comments = self._prev_comments or [] 7250 7251 if explicit and not any_token: 7252 return this 7253 7254 if self._match(TokenType.L_PAREN): 7255 aliases = self.expression( 7256 exp.Aliases, 7257 comments=comments, 7258 this=this, 7259 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7260 ) 7261 self._match_r_paren(aliases) 7262 return aliases 7263 7264 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7265 self.STRING_ALIASES and self._parse_string_as_identifier() 7266 ) 7267 7268 if alias: 7269 comments.extend(alias.pop_comments()) 7270 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7271 column = this.this 7272 7273 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7274 if not this.comments and column and column.comments: 7275 this.comments = column.pop_comments() 7276 7277 return this 7278 7279 def _parse_id_var( 7280 self, 7281 any_token: bool = True, 7282 tokens: t.Optional[t.Collection[TokenType]] = None, 7283 ) -> t.Optional[exp.Expression]: 7284 expression = self._parse_identifier() 7285 if not expression and ( 7286 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7287 ): 7288 quoted = self._prev.token_type == TokenType.STRING 7289 expression = self._identifier_expression(quoted=quoted) 7290 7291 return expression 7292 7293 def _parse_string(self) -> t.Optional[exp.Expression]: 7294 if self._match_set(self.STRING_PARSERS): 7295 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7296 return self._parse_placeholder() 7297 7298 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7299 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7300 if output: 7301 output.update_positions(self._prev) 7302 return output 7303 7304 def _parse_number(self) -> t.Optional[exp.Expression]: 7305 if self._match_set(self.NUMERIC_PARSERS): 7306 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7307 return self._parse_placeholder() 7308 7309 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7310 if self._match(TokenType.IDENTIFIER): 7311 return self._identifier_expression(quoted=True) 7312 return self._parse_placeholder() 7313 7314 def _parse_var( 7315 self, 7316 any_token: bool = False, 7317 tokens: t.Optional[t.Collection[TokenType]] = None, 7318 upper: bool = False, 7319 ) -> t.Optional[exp.Expression]: 7320 if ( 7321 (any_token and self._advance_any()) 7322 or self._match(TokenType.VAR) 7323 or (self._match_set(tokens) if tokens else False) 7324 ): 7325 return self.expression( 7326 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7327 ) 7328 return self._parse_placeholder() 7329 7330 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7331 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7332 self._advance() 7333 return self._prev 7334 return None 7335 7336 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7337 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7338 7339 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7340 return self._parse_primary() or self._parse_var(any_token=True) 7341 7342 def _parse_null(self) -> t.Optional[exp.Expression]: 7343 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7344 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7345 return self._parse_placeholder() 7346 7347 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7348 if self._match(TokenType.TRUE): 7349 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7350 if self._match(TokenType.FALSE): 7351 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7352 return self._parse_placeholder() 7353 7354 def _parse_star(self) -> t.Optional[exp.Expression]: 7355 if self._match(TokenType.STAR): 7356 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7357 return self._parse_placeholder() 7358 7359 def _parse_parameter(self) -> exp.Parameter: 7360 this = self._parse_identifier() or self._parse_primary_or_var() 7361 return self.expression(exp.Parameter, this=this) 7362 7363 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7364 if self._match_set(self.PLACEHOLDER_PARSERS): 7365 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7366 if placeholder: 7367 return placeholder 7368 self._advance(-1) 7369 return None 7370 7371 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7372 if not self._match_texts(keywords): 7373 return None 7374 if self._match(TokenType.L_PAREN, advance=False): 7375 return self._parse_wrapped_csv(self._parse_expression) 7376 7377 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7378 return [expression] if expression else None 7379 7380 def _parse_csv( 7381 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7382 ) -> t.List[exp.Expression]: 7383 parse_result = parse_method() 7384 items = [parse_result] if parse_result is not None else [] 7385 7386 while self._match(sep): 7387 self._add_comments(parse_result) 7388 parse_result = parse_method() 7389 if parse_result is not None: 7390 items.append(parse_result) 7391 7392 return items 7393 7394 def _parse_tokens( 7395 self, parse_method: t.Callable, expressions: t.Dict 7396 ) -> t.Optional[exp.Expression]: 7397 this = parse_method() 7398 7399 while self._match_set(expressions): 7400 this = self.expression( 7401 expressions[self._prev.token_type], 7402 this=this, 7403 comments=self._prev_comments, 7404 expression=parse_method(), 7405 ) 7406 7407 return this 7408 7409 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7410 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7411 7412 def _parse_wrapped_csv( 7413 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7414 ) -> t.List[exp.Expression]: 7415 return self._parse_wrapped( 7416 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7417 ) 7418 7419 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7420 wrapped = self._match(TokenType.L_PAREN) 7421 if not wrapped and not optional: 7422 self.raise_error("Expecting (") 7423 parse_result = parse_method() 7424 if wrapped: 7425 self._match_r_paren() 7426 return parse_result 7427 7428 def _parse_expressions(self) -> t.List[exp.Expression]: 7429 return self._parse_csv(self._parse_expression) 7430 7431 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7432 return ( 7433 self._parse_set_operations( 7434 self._parse_alias(self._parse_assignment(), explicit=True) 7435 if alias 7436 else self._parse_assignment() 7437 ) 7438 or self._parse_select() 7439 ) 7440 7441 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7442 return self._parse_query_modifiers( 7443 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7444 ) 7445 7446 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7447 this = None 7448 if self._match_texts(self.TRANSACTION_KIND): 7449 this = self._prev.text 7450 7451 self._match_texts(("TRANSACTION", "WORK")) 7452 7453 modes = [] 7454 while True: 7455 mode = [] 7456 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7457 mode.append(self._prev.text) 7458 7459 if mode: 7460 modes.append(" ".join(mode)) 7461 if not self._match(TokenType.COMMA): 7462 break 7463 7464 return self.expression(exp.Transaction, this=this, modes=modes) 7465 7466 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7467 chain = None 7468 savepoint = None 7469 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7470 7471 self._match_texts(("TRANSACTION", "WORK")) 7472 7473 if self._match_text_seq("TO"): 7474 self._match_text_seq("SAVEPOINT") 7475 savepoint = self._parse_id_var() 7476 7477 if self._match(TokenType.AND): 7478 chain = not self._match_text_seq("NO") 7479 self._match_text_seq("CHAIN") 7480 7481 if is_rollback: 7482 return self.expression(exp.Rollback, savepoint=savepoint) 7483 7484 return self.expression(exp.Commit, chain=chain) 7485 7486 def _parse_refresh(self) -> exp.Refresh: 7487 self._match(TokenType.TABLE) 7488 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7489 7490 def _parse_column_def_with_exists(self): 7491 start = self._index 7492 self._match(TokenType.COLUMN) 7493 7494 exists_column = self._parse_exists(not_=True) 7495 expression = self._parse_field_def() 7496 7497 if not isinstance(expression, exp.ColumnDef): 7498 self._retreat(start) 7499 return None 7500 7501 expression.set("exists", exists_column) 7502 7503 return expression 7504 7505 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7506 if not self._prev.text.upper() == "ADD": 7507 return None 7508 7509 expression = self._parse_column_def_with_exists() 7510 if not expression: 7511 return None 7512 7513 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7514 if self._match_texts(("FIRST", "AFTER")): 7515 position = self._prev.text 7516 column_position = self.expression( 7517 exp.ColumnPosition, this=self._parse_column(), position=position 7518 ) 7519 expression.set("position", column_position) 7520 7521 return expression 7522 7523 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7524 drop = self._match(TokenType.DROP) and self._parse_drop() 7525 if drop and not isinstance(drop, exp.Command): 7526 drop.set("kind", drop.args.get("kind", "COLUMN")) 7527 return drop 7528 7529 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7530 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7531 return self.expression( 7532 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7533 ) 7534 7535 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7536 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7537 self._match_text_seq("ADD") 7538 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7539 return self.expression( 7540 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7541 ) 7542 7543 column_def = self._parse_add_column() 7544 if isinstance(column_def, exp.ColumnDef): 7545 return column_def 7546 7547 exists = self._parse_exists(not_=True) 7548 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7549 return self.expression( 7550 exp.AddPartition, 7551 exists=exists, 7552 this=self._parse_field(any_token=True), 7553 location=self._match_text_seq("LOCATION", advance=False) 7554 and self._parse_property(), 7555 ) 7556 7557 return None 7558 7559 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7560 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7561 or self._match_text_seq("COLUMNS") 7562 ): 7563 schema = self._parse_schema() 7564 7565 return ( 7566 ensure_list(schema) 7567 if schema 7568 else self._parse_csv(self._parse_column_def_with_exists) 7569 ) 7570 7571 return self._parse_csv(_parse_add_alteration) 7572 7573 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7574 if self._match_texts(self.ALTER_ALTER_PARSERS): 7575 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7576 7577 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7578 # keyword after ALTER we default to parsing this statement 7579 self._match(TokenType.COLUMN) 7580 column = self._parse_field(any_token=True) 7581 7582 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7583 return self.expression(exp.AlterColumn, this=column, drop=True) 7584 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7585 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7586 if self._match(TokenType.COMMENT): 7587 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7588 if self._match_text_seq("DROP", "NOT", "NULL"): 7589 return self.expression( 7590 exp.AlterColumn, 7591 this=column, 7592 drop=True, 7593 allow_null=True, 7594 ) 7595 if self._match_text_seq("SET", "NOT", "NULL"): 7596 return self.expression( 7597 exp.AlterColumn, 7598 this=column, 7599 allow_null=False, 7600 ) 7601 7602 if self._match_text_seq("SET", "VISIBLE"): 7603 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7604 if self._match_text_seq("SET", "INVISIBLE"): 7605 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7606 7607 self._match_text_seq("SET", "DATA") 7608 self._match_text_seq("TYPE") 7609 return self.expression( 7610 exp.AlterColumn, 7611 this=column, 7612 dtype=self._parse_types(), 7613 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7614 using=self._match(TokenType.USING) and self._parse_assignment(), 7615 ) 7616 7617 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7618 if self._match_texts(("ALL", "EVEN", "AUTO")): 7619 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7620 7621 self._match_text_seq("KEY", "DISTKEY") 7622 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7623 7624 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7625 if compound: 7626 self._match_text_seq("SORTKEY") 7627 7628 if self._match(TokenType.L_PAREN, advance=False): 7629 return self.expression( 7630 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7631 ) 7632 7633 self._match_texts(("AUTO", "NONE")) 7634 return self.expression( 7635 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7636 ) 7637 7638 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7639 index = self._index - 1 7640 7641 partition_exists = self._parse_exists() 7642 if self._match(TokenType.PARTITION, advance=False): 7643 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7644 7645 self._retreat(index) 7646 return self._parse_csv(self._parse_drop_column) 7647 7648 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7649 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7650 exists = self._parse_exists() 7651 old_column = self._parse_column() 7652 to = self._match_text_seq("TO") 7653 new_column = self._parse_column() 7654 7655 if old_column is None or to is None or new_column is None: 7656 return None 7657 7658 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7659 7660 self._match_text_seq("TO") 7661 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7662 7663 def _parse_alter_table_set(self) -> exp.AlterSet: 7664 alter_set = self.expression(exp.AlterSet) 7665 7666 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7667 "TABLE", "PROPERTIES" 7668 ): 7669 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7670 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7671 alter_set.set("expressions", [self._parse_assignment()]) 7672 elif self._match_texts(("LOGGED", "UNLOGGED")): 7673 alter_set.set("option", exp.var(self._prev.text.upper())) 7674 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7675 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7676 elif self._match_text_seq("LOCATION"): 7677 alter_set.set("location", self._parse_field()) 7678 elif self._match_text_seq("ACCESS", "METHOD"): 7679 alter_set.set("access_method", self._parse_field()) 7680 elif self._match_text_seq("TABLESPACE"): 7681 alter_set.set("tablespace", self._parse_field()) 7682 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7683 alter_set.set("file_format", [self._parse_field()]) 7684 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7685 alter_set.set("file_format", self._parse_wrapped_options()) 7686 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7687 alter_set.set("copy_options", self._parse_wrapped_options()) 7688 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7689 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7690 else: 7691 if self._match_text_seq("SERDE"): 7692 alter_set.set("serde", self._parse_field()) 7693 7694 properties = self._parse_wrapped(self._parse_properties, optional=True) 7695 alter_set.set("expressions", [properties]) 7696 7697 return alter_set 7698 7699 def _parse_alter_session(self) -> exp.AlterSession: 7700 """Parse ALTER SESSION SET/UNSET statements.""" 7701 if self._match(TokenType.SET): 7702 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7703 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7704 7705 self._match_text_seq("UNSET") 7706 expressions = self._parse_csv( 7707 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7708 ) 7709 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7710 7711 def _parse_alter(self) -> exp.Alter | exp.Command: 7712 start = self._prev 7713 7714 alter_token = self._match_set(self.ALTERABLES) and self._prev 7715 if not alter_token: 7716 return self._parse_as_command(start) 7717 7718 exists = self._parse_exists() 7719 only = self._match_text_seq("ONLY") 7720 7721 if alter_token.token_type == TokenType.SESSION: 7722 this = None 7723 check = None 7724 cluster = None 7725 else: 7726 this = self._parse_table(schema=True) 7727 check = self._match_text_seq("WITH", "CHECK") 7728 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7729 7730 if self._next: 7731 self._advance() 7732 7733 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7734 if parser: 7735 actions = ensure_list(parser(self)) 7736 not_valid = self._match_text_seq("NOT", "VALID") 7737 options = self._parse_csv(self._parse_property) 7738 7739 if not self._curr and actions: 7740 return self.expression( 7741 exp.Alter, 7742 this=this, 7743 kind=alter_token.text.upper(), 7744 exists=exists, 7745 actions=actions, 7746 only=only, 7747 options=options, 7748 cluster=cluster, 7749 not_valid=not_valid, 7750 check=check, 7751 ) 7752 7753 return self._parse_as_command(start) 7754 7755 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7756 start = self._prev 7757 # https://duckdb.org/docs/sql/statements/analyze 7758 if not self._curr: 7759 return self.expression(exp.Analyze) 7760 7761 options = [] 7762 while self._match_texts(self.ANALYZE_STYLES): 7763 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7764 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7765 else: 7766 options.append(self._prev.text.upper()) 7767 7768 this: t.Optional[exp.Expression] = None 7769 inner_expression: t.Optional[exp.Expression] = None 7770 7771 kind = self._curr and self._curr.text.upper() 7772 7773 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7774 this = self._parse_table_parts() 7775 elif self._match_text_seq("TABLES"): 7776 if self._match_set((TokenType.FROM, TokenType.IN)): 7777 kind = f"{kind} {self._prev.text.upper()}" 7778 this = self._parse_table(schema=True, is_db_reference=True) 7779 elif self._match_text_seq("DATABASE"): 7780 this = self._parse_table(schema=True, is_db_reference=True) 7781 elif self._match_text_seq("CLUSTER"): 7782 this = self._parse_table() 7783 # Try matching inner expr keywords before fallback to parse table. 7784 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7785 kind = None 7786 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7787 else: 7788 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7789 kind = None 7790 this = self._parse_table_parts() 7791 7792 partition = self._try_parse(self._parse_partition) 7793 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7794 return self._parse_as_command(start) 7795 7796 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7797 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7798 "WITH", "ASYNC", "MODE" 7799 ): 7800 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7801 else: 7802 mode = None 7803 7804 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7805 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7806 7807 properties = self._parse_properties() 7808 return self.expression( 7809 exp.Analyze, 7810 kind=kind, 7811 this=this, 7812 mode=mode, 7813 partition=partition, 7814 properties=properties, 7815 expression=inner_expression, 7816 options=options, 7817 ) 7818 7819 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7820 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7821 this = None 7822 kind = self._prev.text.upper() 7823 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7824 expressions = [] 7825 7826 if not self._match_text_seq("STATISTICS"): 7827 self.raise_error("Expecting token STATISTICS") 7828 7829 if self._match_text_seq("NOSCAN"): 7830 this = "NOSCAN" 7831 elif self._match(TokenType.FOR): 7832 if self._match_text_seq("ALL", "COLUMNS"): 7833 this = "FOR ALL COLUMNS" 7834 if self._match_texts("COLUMNS"): 7835 this = "FOR COLUMNS" 7836 expressions = self._parse_csv(self._parse_column_reference) 7837 elif self._match_text_seq("SAMPLE"): 7838 sample = self._parse_number() 7839 expressions = [ 7840 self.expression( 7841 exp.AnalyzeSample, 7842 sample=sample, 7843 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7844 ) 7845 ] 7846 7847 return self.expression( 7848 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7849 ) 7850 7851 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7852 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7853 kind = None 7854 this = None 7855 expression: t.Optional[exp.Expression] = None 7856 if self._match_text_seq("REF", "UPDATE"): 7857 kind = "REF" 7858 this = "UPDATE" 7859 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7860 this = "UPDATE SET DANGLING TO NULL" 7861 elif self._match_text_seq("STRUCTURE"): 7862 kind = "STRUCTURE" 7863 if self._match_text_seq("CASCADE", "FAST"): 7864 this = "CASCADE FAST" 7865 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7866 ("ONLINE", "OFFLINE") 7867 ): 7868 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7869 expression = self._parse_into() 7870 7871 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7872 7873 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7874 this = self._prev.text.upper() 7875 if self._match_text_seq("COLUMNS"): 7876 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7877 return None 7878 7879 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7880 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7881 if self._match_text_seq("STATISTICS"): 7882 return self.expression(exp.AnalyzeDelete, kind=kind) 7883 return None 7884 7885 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7886 if self._match_text_seq("CHAINED", "ROWS"): 7887 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7888 return None 7889 7890 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7891 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7892 this = self._prev.text.upper() 7893 expression: t.Optional[exp.Expression] = None 7894 expressions = [] 7895 update_options = None 7896 7897 if self._match_text_seq("HISTOGRAM", "ON"): 7898 expressions = self._parse_csv(self._parse_column_reference) 7899 with_expressions = [] 7900 while self._match(TokenType.WITH): 7901 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7902 if self._match_texts(("SYNC", "ASYNC")): 7903 if self._match_text_seq("MODE", advance=False): 7904 with_expressions.append(f"{self._prev.text.upper()} MODE") 7905 self._advance() 7906 else: 7907 buckets = self._parse_number() 7908 if self._match_text_seq("BUCKETS"): 7909 with_expressions.append(f"{buckets} BUCKETS") 7910 if with_expressions: 7911 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7912 7913 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7914 TokenType.UPDATE, advance=False 7915 ): 7916 update_options = self._prev.text.upper() 7917 self._advance() 7918 elif self._match_text_seq("USING", "DATA"): 7919 expression = self.expression(exp.UsingData, this=self._parse_string()) 7920 7921 return self.expression( 7922 exp.AnalyzeHistogram, 7923 this=this, 7924 expressions=expressions, 7925 expression=expression, 7926 update_options=update_options, 7927 ) 7928 7929 def _parse_merge(self) -> exp.Merge: 7930 self._match(TokenType.INTO) 7931 target = self._parse_table() 7932 7933 if target and self._match(TokenType.ALIAS, advance=False): 7934 target.set("alias", self._parse_table_alias()) 7935 7936 self._match(TokenType.USING) 7937 using = self._parse_table() 7938 7939 self._match(TokenType.ON) 7940 on = self._parse_assignment() 7941 7942 return self.expression( 7943 exp.Merge, 7944 this=target, 7945 using=using, 7946 on=on, 7947 whens=self._parse_when_matched(), 7948 returning=self._parse_returning(), 7949 ) 7950 7951 def _parse_when_matched(self) -> exp.Whens: 7952 whens = [] 7953 7954 while self._match(TokenType.WHEN): 7955 matched = not self._match(TokenType.NOT) 7956 self._match_text_seq("MATCHED") 7957 source = ( 7958 False 7959 if self._match_text_seq("BY", "TARGET") 7960 else self._match_text_seq("BY", "SOURCE") 7961 ) 7962 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7963 7964 self._match(TokenType.THEN) 7965 7966 if self._match(TokenType.INSERT): 7967 this = self._parse_star() 7968 if this: 7969 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7970 else: 7971 then = self.expression( 7972 exp.Insert, 7973 this=exp.var("ROW") 7974 if self._match_text_seq("ROW") 7975 else self._parse_value(values=False), 7976 expression=self._match_text_seq("VALUES") and self._parse_value(), 7977 ) 7978 elif self._match(TokenType.UPDATE): 7979 expressions = self._parse_star() 7980 if expressions: 7981 then = self.expression(exp.Update, expressions=expressions) 7982 else: 7983 then = self.expression( 7984 exp.Update, 7985 expressions=self._match(TokenType.SET) 7986 and self._parse_csv(self._parse_equality), 7987 ) 7988 elif self._match(TokenType.DELETE): 7989 then = self.expression(exp.Var, this=self._prev.text) 7990 else: 7991 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7992 7993 whens.append( 7994 self.expression( 7995 exp.When, 7996 matched=matched, 7997 source=source, 7998 condition=condition, 7999 then=then, 8000 ) 8001 ) 8002 return self.expression(exp.Whens, expressions=whens) 8003 8004 def _parse_show(self) -> t.Optional[exp.Expression]: 8005 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8006 if parser: 8007 return parser(self) 8008 return self._parse_as_command(self._prev) 8009 8010 def _parse_set_item_assignment( 8011 self, kind: t.Optional[str] = None 8012 ) -> t.Optional[exp.Expression]: 8013 index = self._index 8014 8015 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8016 return self._parse_set_transaction(global_=kind == "GLOBAL") 8017 8018 left = self._parse_primary() or self._parse_column() 8019 assignment_delimiter = self._match_texts(("=", "TO")) 8020 8021 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8022 self._retreat(index) 8023 return None 8024 8025 right = self._parse_statement() or self._parse_id_var() 8026 if isinstance(right, (exp.Column, exp.Identifier)): 8027 right = exp.var(right.name) 8028 8029 this = self.expression(exp.EQ, this=left, expression=right) 8030 return self.expression(exp.SetItem, this=this, kind=kind) 8031 8032 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8033 self._match_text_seq("TRANSACTION") 8034 characteristics = self._parse_csv( 8035 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8036 ) 8037 return self.expression( 8038 exp.SetItem, 8039 expressions=characteristics, 8040 kind="TRANSACTION", 8041 **{"global": global_}, # type: ignore 8042 ) 8043 8044 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8045 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8046 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8047 8048 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8049 index = self._index 8050 set_ = self.expression( 8051 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8052 ) 8053 8054 if self._curr: 8055 self._retreat(index) 8056 return self._parse_as_command(self._prev) 8057 8058 return set_ 8059 8060 def _parse_var_from_options( 8061 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8062 ) -> t.Optional[exp.Var]: 8063 start = self._curr 8064 if not start: 8065 return None 8066 8067 option = start.text.upper() 8068 continuations = options.get(option) 8069 8070 index = self._index 8071 self._advance() 8072 for keywords in continuations or []: 8073 if isinstance(keywords, str): 8074 keywords = (keywords,) 8075 8076 if self._match_text_seq(*keywords): 8077 option = f"{option} {' '.join(keywords)}" 8078 break 8079 else: 8080 if continuations or continuations is None: 8081 if raise_unmatched: 8082 self.raise_error(f"Unknown option {option}") 8083 8084 self._retreat(index) 8085 return None 8086 8087 return exp.var(option) 8088 8089 def _parse_as_command(self, start: Token) -> exp.Command: 8090 while self._curr: 8091 self._advance() 8092 text = self._find_sql(start, self._prev) 8093 size = len(start.text) 8094 self._warn_unsupported() 8095 return exp.Command(this=text[:size], expression=text[size:]) 8096 8097 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8098 settings = [] 8099 8100 self._match_l_paren() 8101 kind = self._parse_id_var() 8102 8103 if self._match(TokenType.L_PAREN): 8104 while True: 8105 key = self._parse_id_var() 8106 value = self._parse_primary() 8107 if not key and value is None: 8108 break 8109 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8110 self._match(TokenType.R_PAREN) 8111 8112 self._match_r_paren() 8113 8114 return self.expression( 8115 exp.DictProperty, 8116 this=this, 8117 kind=kind.this if kind else None, 8118 settings=settings, 8119 ) 8120 8121 def _parse_dict_range(self, this: str) -> exp.DictRange: 8122 self._match_l_paren() 8123 has_min = self._match_text_seq("MIN") 8124 if has_min: 8125 min = self._parse_var() or self._parse_primary() 8126 self._match_text_seq("MAX") 8127 max = self._parse_var() or self._parse_primary() 8128 else: 8129 max = self._parse_var() or self._parse_primary() 8130 min = exp.Literal.number(0) 8131 self._match_r_paren() 8132 return self.expression(exp.DictRange, this=this, min=min, max=max) 8133 8134 def _parse_comprehension( 8135 self, this: t.Optional[exp.Expression] 8136 ) -> t.Optional[exp.Comprehension]: 8137 index = self._index 8138 expression = self._parse_column() 8139 if not self._match(TokenType.IN): 8140 self._retreat(index - 1) 8141 return None 8142 iterator = self._parse_column() 8143 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8144 return self.expression( 8145 exp.Comprehension, 8146 this=this, 8147 expression=expression, 8148 iterator=iterator, 8149 condition=condition, 8150 ) 8151 8152 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8153 if self._match(TokenType.HEREDOC_STRING): 8154 return self.expression(exp.Heredoc, this=self._prev.text) 8155 8156 if not self._match_text_seq("$"): 8157 return None 8158 8159 tags = ["$"] 8160 tag_text = None 8161 8162 if self._is_connected(): 8163 self._advance() 8164 tags.append(self._prev.text.upper()) 8165 else: 8166 self.raise_error("No closing $ found") 8167 8168 if tags[-1] != "$": 8169 if self._is_connected() and self._match_text_seq("$"): 8170 tag_text = tags[-1] 8171 tags.append("$") 8172 else: 8173 self.raise_error("No closing $ found") 8174 8175 heredoc_start = self._curr 8176 8177 while self._curr: 8178 if self._match_text_seq(*tags, advance=False): 8179 this = self._find_sql(heredoc_start, self._prev) 8180 self._advance(len(tags)) 8181 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8182 8183 self._advance() 8184 8185 self.raise_error(f"No closing {''.join(tags)} found") 8186 return None 8187 8188 def _find_parser( 8189 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8190 ) -> t.Optional[t.Callable]: 8191 if not self._curr: 8192 return None 8193 8194 index = self._index 8195 this = [] 8196 while True: 8197 # The current token might be multiple words 8198 curr = self._curr.text.upper() 8199 key = curr.split(" ") 8200 this.append(curr) 8201 8202 self._advance() 8203 result, trie = in_trie(trie, key) 8204 if result == TrieResult.FAILED: 8205 break 8206 8207 if result == TrieResult.EXISTS: 8208 subparser = parsers[" ".join(this)] 8209 return subparser 8210 8211 self._retreat(index) 8212 return None 8213 8214 def _match(self, token_type, advance=True, expression=None): 8215 if not self._curr: 8216 return None 8217 8218 if self._curr.token_type == token_type: 8219 if advance: 8220 self._advance() 8221 self._add_comments(expression) 8222 return True 8223 8224 return None 8225 8226 def _match_set(self, types, advance=True): 8227 if not self._curr: 8228 return None 8229 8230 if self._curr.token_type in types: 8231 if advance: 8232 self._advance() 8233 return True 8234 8235 return None 8236 8237 def _match_pair(self, token_type_a, token_type_b, advance=True): 8238 if not self._curr or not self._next: 8239 return None 8240 8241 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8242 if advance: 8243 self._advance(2) 8244 return True 8245 8246 return None 8247 8248 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8249 if not self._match(TokenType.L_PAREN, expression=expression): 8250 self.raise_error("Expecting (") 8251 8252 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8253 if not self._match(TokenType.R_PAREN, expression=expression): 8254 self.raise_error("Expecting )") 8255 8256 def _match_texts(self, texts, advance=True): 8257 if ( 8258 self._curr 8259 and self._curr.token_type != TokenType.STRING 8260 and self._curr.text.upper() in texts 8261 ): 8262 if advance: 8263 self._advance() 8264 return True 8265 return None 8266 8267 def _match_text_seq(self, *texts, advance=True): 8268 index = self._index 8269 for text in texts: 8270 if ( 8271 self._curr 8272 and self._curr.token_type != TokenType.STRING 8273 and self._curr.text.upper() == text 8274 ): 8275 self._advance() 8276 else: 8277 self._retreat(index) 8278 return None 8279 8280 if not advance: 8281 self._retreat(index) 8282 8283 return True 8284 8285 def _replace_lambda( 8286 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8287 ) -> t.Optional[exp.Expression]: 8288 if not node: 8289 return node 8290 8291 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8292 8293 for column in node.find_all(exp.Column): 8294 typ = lambda_types.get(column.parts[0].name) 8295 if typ is not None: 8296 dot_or_id = column.to_dot() if column.table else column.this 8297 8298 if typ: 8299 dot_or_id = self.expression( 8300 exp.Cast, 8301 this=dot_or_id, 8302 to=typ, 8303 ) 8304 8305 parent = column.parent 8306 8307 while isinstance(parent, exp.Dot): 8308 if not isinstance(parent.parent, exp.Dot): 8309 parent.replace(dot_or_id) 8310 break 8311 parent = parent.parent 8312 else: 8313 if column is node: 8314 node = dot_or_id 8315 else: 8316 column.replace(dot_or_id) 8317 return node 8318 8319 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8320 start = self._prev 8321 8322 # Not to be confused with TRUNCATE(number, decimals) function call 8323 if self._match(TokenType.L_PAREN): 8324 self._retreat(self._index - 2) 8325 return self._parse_function() 8326 8327 # Clickhouse supports TRUNCATE DATABASE as well 8328 is_database = self._match(TokenType.DATABASE) 8329 8330 self._match(TokenType.TABLE) 8331 8332 exists = self._parse_exists(not_=False) 8333 8334 expressions = self._parse_csv( 8335 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8336 ) 8337 8338 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8339 8340 if self._match_text_seq("RESTART", "IDENTITY"): 8341 identity = "RESTART" 8342 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8343 identity = "CONTINUE" 8344 else: 8345 identity = None 8346 8347 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8348 option = self._prev.text 8349 else: 8350 option = None 8351 8352 partition = self._parse_partition() 8353 8354 # Fallback case 8355 if self._curr: 8356 return self._parse_as_command(start) 8357 8358 return self.expression( 8359 exp.TruncateTable, 8360 expressions=expressions, 8361 is_database=is_database, 8362 exists=exists, 8363 cluster=cluster, 8364 identity=identity, 8365 option=option, 8366 partition=partition, 8367 ) 8368 8369 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8370 this = self._parse_ordered(self._parse_opclass) 8371 8372 if not self._match(TokenType.WITH): 8373 return this 8374 8375 op = self._parse_var(any_token=True) 8376 8377 return self.expression(exp.WithOperator, this=this, op=op) 8378 8379 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8380 self._match(TokenType.EQ) 8381 self._match(TokenType.L_PAREN) 8382 8383 opts: t.List[t.Optional[exp.Expression]] = [] 8384 option: exp.Expression | None 8385 while self._curr and not self._match(TokenType.R_PAREN): 8386 if self._match_text_seq("FORMAT_NAME", "="): 8387 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8388 option = self._parse_format_name() 8389 else: 8390 option = self._parse_property() 8391 8392 if option is None: 8393 self.raise_error("Unable to parse option") 8394 break 8395 8396 opts.append(option) 8397 8398 return opts 8399 8400 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8401 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8402 8403 options = [] 8404 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8405 option = self._parse_var(any_token=True) 8406 prev = self._prev.text.upper() 8407 8408 # Different dialects might separate options and values by white space, "=" and "AS" 8409 self._match(TokenType.EQ) 8410 self._match(TokenType.ALIAS) 8411 8412 param = self.expression(exp.CopyParameter, this=option) 8413 8414 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8415 TokenType.L_PAREN, advance=False 8416 ): 8417 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8418 param.set("expressions", self._parse_wrapped_options()) 8419 elif prev == "FILE_FORMAT": 8420 # T-SQL's external file format case 8421 param.set("expression", self._parse_field()) 8422 else: 8423 param.set("expression", self._parse_unquoted_field()) 8424 8425 options.append(param) 8426 self._match(sep) 8427 8428 return options 8429 8430 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8431 expr = self.expression(exp.Credentials) 8432 8433 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8434 expr.set("storage", self._parse_field()) 8435 if self._match_text_seq("CREDENTIALS"): 8436 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8437 creds = ( 8438 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8439 ) 8440 expr.set("credentials", creds) 8441 if self._match_text_seq("ENCRYPTION"): 8442 expr.set("encryption", self._parse_wrapped_options()) 8443 if self._match_text_seq("IAM_ROLE"): 8444 expr.set("iam_role", self._parse_field()) 8445 if self._match_text_seq("REGION"): 8446 expr.set("region", self._parse_field()) 8447 8448 return expr 8449 8450 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8451 return self._parse_field() 8452 8453 def _parse_copy(self) -> exp.Copy | exp.Command: 8454 start = self._prev 8455 8456 self._match(TokenType.INTO) 8457 8458 this = ( 8459 self._parse_select(nested=True, parse_subquery_alias=False) 8460 if self._match(TokenType.L_PAREN, advance=False) 8461 else self._parse_table(schema=True) 8462 ) 8463 8464 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8465 8466 files = self._parse_csv(self._parse_file_location) 8467 if self._match(TokenType.EQ, advance=False): 8468 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8469 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8470 # list via `_parse_wrapped(..)` below. 8471 self._advance(-1) 8472 files = [] 8473 8474 credentials = self._parse_credentials() 8475 8476 self._match_text_seq("WITH") 8477 8478 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8479 8480 # Fallback case 8481 if self._curr: 8482 return self._parse_as_command(start) 8483 8484 return self.expression( 8485 exp.Copy, 8486 this=this, 8487 kind=kind, 8488 credentials=credentials, 8489 files=files, 8490 params=params, 8491 ) 8492 8493 def _parse_normalize(self) -> exp.Normalize: 8494 return self.expression( 8495 exp.Normalize, 8496 this=self._parse_bitwise(), 8497 form=self._match(TokenType.COMMA) and self._parse_var(), 8498 ) 8499 8500 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8501 args = self._parse_csv(lambda: self._parse_lambda()) 8502 8503 this = seq_get(args, 0) 8504 decimals = seq_get(args, 1) 8505 8506 return expr_type( 8507 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8508 ) 8509 8510 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8511 star_token = self._prev 8512 8513 if self._match_text_seq("COLUMNS", "(", advance=False): 8514 this = self._parse_function() 8515 if isinstance(this, exp.Columns): 8516 this.set("unpack", True) 8517 return this 8518 8519 return self.expression( 8520 exp.Star, 8521 **{ # type: ignore 8522 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8523 "replace": self._parse_star_op("REPLACE"), 8524 "rename": self._parse_star_op("RENAME"), 8525 }, 8526 ).update_positions(star_token) 8527 8528 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8529 privilege_parts = [] 8530 8531 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8532 # (end of privilege list) or L_PAREN (start of column list) are met 8533 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8534 privilege_parts.append(self._curr.text.upper()) 8535 self._advance() 8536 8537 this = exp.var(" ".join(privilege_parts)) 8538 expressions = ( 8539 self._parse_wrapped_csv(self._parse_column) 8540 if self._match(TokenType.L_PAREN, advance=False) 8541 else None 8542 ) 8543 8544 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8545 8546 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8547 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8548 principal = self._parse_id_var() 8549 8550 if not principal: 8551 return None 8552 8553 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8554 8555 def _parse_grant_revoke_common( 8556 self, 8557 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8558 privileges = self._parse_csv(self._parse_grant_privilege) 8559 8560 self._match(TokenType.ON) 8561 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8562 8563 # Attempt to parse the securable e.g. MySQL allows names 8564 # such as "foo.*", "*.*" which are not easily parseable yet 8565 securable = self._try_parse(self._parse_table_parts) 8566 8567 return privileges, kind, securable 8568 8569 def _parse_grant(self) -> exp.Grant | exp.Command: 8570 start = self._prev 8571 8572 privileges, kind, securable = self._parse_grant_revoke_common() 8573 8574 if not securable or not self._match_text_seq("TO"): 8575 return self._parse_as_command(start) 8576 8577 principals = self._parse_csv(self._parse_grant_principal) 8578 8579 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8580 8581 if self._curr: 8582 return self._parse_as_command(start) 8583 8584 return self.expression( 8585 exp.Grant, 8586 privileges=privileges, 8587 kind=kind, 8588 securable=securable, 8589 principals=principals, 8590 grant_option=grant_option, 8591 ) 8592 8593 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8594 start = self._prev 8595 8596 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8597 8598 privileges, kind, securable = self._parse_grant_revoke_common() 8599 8600 if not securable or not self._match_text_seq("FROM"): 8601 return self._parse_as_command(start) 8602 8603 principals = self._parse_csv(self._parse_grant_principal) 8604 8605 cascade = None 8606 if self._match_texts(("CASCADE", "RESTRICT")): 8607 cascade = self._prev.text.upper() 8608 8609 if self._curr: 8610 return self._parse_as_command(start) 8611 8612 return self.expression( 8613 exp.Revoke, 8614 privileges=privileges, 8615 kind=kind, 8616 securable=securable, 8617 principals=principals, 8618 grant_option=grant_option, 8619 cascade=cascade, 8620 ) 8621 8622 def _parse_overlay(self) -> exp.Overlay: 8623 return self.expression( 8624 exp.Overlay, 8625 **{ # type: ignore 8626 "this": self._parse_bitwise(), 8627 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8628 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8629 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8630 }, 8631 ) 8632 8633 def _parse_format_name(self) -> exp.Property: 8634 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8635 # for FILE_FORMAT = <format_name> 8636 return self.expression( 8637 exp.Property, 8638 this=exp.var("FORMAT_NAME"), 8639 value=self._parse_string() or self._parse_table_parts(), 8640 ) 8641 8642 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8643 args: t.List[exp.Expression] = [] 8644 8645 if self._match(TokenType.DISTINCT): 8646 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8647 self._match(TokenType.COMMA) 8648 8649 args.extend(self._parse_csv(self._parse_assignment)) 8650 8651 return self.expression( 8652 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8653 ) 8654 8655 def _identifier_expression( 8656 self, token: t.Optional[Token] = None, **kwargs: t.Any 8657 ) -> exp.Identifier: 8658 token = token or self._prev 8659 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8660 expression.update_positions(token) 8661 return expression 8662 8663 def _build_pipe_cte( 8664 self, 8665 query: exp.Query, 8666 expressions: t.List[exp.Expression], 8667 alias_cte: t.Optional[exp.TableAlias] = None, 8668 ) -> exp.Select: 8669 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8670 if alias_cte: 8671 new_cte = alias_cte 8672 else: 8673 self._pipe_cte_counter += 1 8674 new_cte = f"__tmp{self._pipe_cte_counter}" 8675 8676 with_ = query.args.get("with") 8677 ctes = with_.pop() if with_ else None 8678 8679 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8680 if ctes: 8681 new_select.set("with", ctes) 8682 8683 return new_select.with_(new_cte, as_=query, copy=False) 8684 8685 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8686 select = self._parse_select(consume_pipe=False) 8687 if not select: 8688 return query 8689 8690 return self._build_pipe_cte( 8691 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8692 ) 8693 8694 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8695 limit = self._parse_limit() 8696 offset = self._parse_offset() 8697 if limit: 8698 curr_limit = query.args.get("limit", limit) 8699 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8700 query.limit(limit, copy=False) 8701 if offset: 8702 curr_offset = query.args.get("offset") 8703 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8704 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8705 8706 return query 8707 8708 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8709 this = self._parse_assignment() 8710 if self._match_text_seq("GROUP", "AND", advance=False): 8711 return this 8712 8713 this = self._parse_alias(this) 8714 8715 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8716 return self._parse_ordered(lambda: this) 8717 8718 return this 8719 8720 def _parse_pipe_syntax_aggregate_group_order_by( 8721 self, query: exp.Select, group_by_exists: bool = True 8722 ) -> exp.Select: 8723 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8724 aggregates_or_groups, orders = [], [] 8725 for element in expr: 8726 if isinstance(element, exp.Ordered): 8727 this = element.this 8728 if isinstance(this, exp.Alias): 8729 element.set("this", this.args["alias"]) 8730 orders.append(element) 8731 else: 8732 this = element 8733 aggregates_or_groups.append(this) 8734 8735 if group_by_exists: 8736 query.select(*aggregates_or_groups, copy=False).group_by( 8737 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8738 copy=False, 8739 ) 8740 else: 8741 query.select(*aggregates_or_groups, append=False, copy=False) 8742 8743 if orders: 8744 return query.order_by(*orders, append=False, copy=False) 8745 8746 return query 8747 8748 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8749 self._match_text_seq("AGGREGATE") 8750 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8751 8752 if self._match(TokenType.GROUP_BY) or ( 8753 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8754 ): 8755 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8756 8757 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8758 8759 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8760 first_setop = self.parse_set_operation(this=query) 8761 if not first_setop: 8762 return None 8763 8764 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8765 expr = self._parse_paren() 8766 return expr.assert_is(exp.Subquery).unnest() if expr else None 8767 8768 first_setop.this.pop() 8769 8770 setops = [ 8771 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8772 *self._parse_csv(_parse_and_unwrap_query), 8773 ] 8774 8775 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8776 with_ = query.args.get("with") 8777 ctes = with_.pop() if with_ else None 8778 8779 if isinstance(first_setop, exp.Union): 8780 query = query.union(*setops, copy=False, **first_setop.args) 8781 elif isinstance(first_setop, exp.Except): 8782 query = query.except_(*setops, copy=False, **first_setop.args) 8783 else: 8784 query = query.intersect(*setops, copy=False, **first_setop.args) 8785 8786 query.set("with", ctes) 8787 8788 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8789 8790 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8791 join = self._parse_join() 8792 if not join: 8793 return None 8794 8795 if isinstance(query, exp.Select): 8796 return query.join(join, copy=False) 8797 8798 return query 8799 8800 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8801 pivots = self._parse_pivots() 8802 if not pivots: 8803 return query 8804 8805 from_ = query.args.get("from") 8806 if from_: 8807 from_.this.set("pivots", pivots) 8808 8809 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8810 8811 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8812 self._match_text_seq("EXTEND") 8813 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8814 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8815 8816 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8817 sample = self._parse_table_sample() 8818 8819 with_ = query.args.get("with") 8820 if with_: 8821 with_.expressions[-1].this.set("sample", sample) 8822 else: 8823 query.set("sample", sample) 8824 8825 return query 8826 8827 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8828 if isinstance(query, exp.Subquery): 8829 query = exp.select("*").from_(query, copy=False) 8830 8831 if not query.args.get("from"): 8832 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8833 8834 while self._match(TokenType.PIPE_GT): 8835 start = self._curr 8836 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8837 if not parser: 8838 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8839 # keywords, making it tricky to disambiguate them without lookahead. The approach 8840 # here is to try and parse a set operation and if that fails, then try to parse a 8841 # join operator. If that fails as well, then the operator is not supported. 8842 parsed_query = self._parse_pipe_syntax_set_operator(query) 8843 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8844 if not parsed_query: 8845 self._retreat(start) 8846 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8847 break 8848 query = parsed_query 8849 else: 8850 query = parser(self, query) 8851 8852 return query 8853 8854 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8855 vars = self._parse_csv(self._parse_id_var) 8856 if not vars: 8857 return None 8858 8859 return self.expression( 8860 exp.DeclareItem, 8861 this=vars, 8862 kind=self._parse_types(), 8863 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8864 ) 8865 8866 def _parse_declare(self) -> exp.Declare | exp.Command: 8867 start = self._prev 8868 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8869 8870 if not expressions or self._curr: 8871 return self._parse_as_command(start) 8872 8873 return self.expression(exp.Declare, expressions=expressions) 8874 8875 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8876 exp_class = exp.Cast if strict else exp.TryCast 8877 8878 if exp_class == exp.TryCast: 8879 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8880 8881 return self.expression(exp_class, **kwargs) 8882 8883 def _parse_json_value(self) -> exp.JSONValue: 8884 this = self._parse_bitwise() 8885 self._match(TokenType.COMMA) 8886 path = self._parse_bitwise() 8887 8888 returning = self._match(TokenType.RETURNING) and self._parse_type() 8889 8890 return self.expression( 8891 exp.JSONValue, 8892 this=this, 8893 path=self.dialect.to_json_path(path), 8894 returning=returning, 8895 on_condition=self._parse_on_condition(), 8896 ) 8897 8898 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8899 def concat_exprs( 8900 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8901 ) -> exp.Expression: 8902 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8903 concat_exprs = [ 8904 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8905 ] 8906 node.set("expressions", concat_exprs) 8907 return node 8908 if len(exprs) == 1: 8909 return exprs[0] 8910 return self.expression(exp.Concat, expressions=args, safe=True) 8911 8912 args = self._parse_csv(self._parse_lambda) 8913 8914 if args: 8915 order = args[-1] if isinstance(args[-1], exp.Order) else None 8916 8917 if order: 8918 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8919 # remove 'expr' from exp.Order and add it back to args 8920 args[-1] = order.this 8921 order.set("this", concat_exprs(order.this, args)) 8922 8923 this = order or concat_exprs(args[0], args) 8924 else: 8925 this = None 8926 8927 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8928 8929 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1591 def __init__( 1592 self, 1593 error_level: t.Optional[ErrorLevel] = None, 1594 error_message_context: int = 100, 1595 max_errors: int = 3, 1596 dialect: DialectType = None, 1597 ): 1598 from sqlglot.dialects import Dialect 1599 1600 self.error_level = error_level or ErrorLevel.IMMEDIATE 1601 self.error_message_context = error_message_context 1602 self.max_errors = max_errors 1603 self.dialect = Dialect.get_or_raise(dialect) 1604 self.reset()
1617 def parse( 1618 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1619 ) -> t.List[t.Optional[exp.Expression]]: 1620 """ 1621 Parses a list of tokens and returns a list of syntax trees, one tree 1622 per parsed SQL statement. 1623 1624 Args: 1625 raw_tokens: The list of tokens. 1626 sql: The original SQL string, used to produce helpful debug messages. 1627 1628 Returns: 1629 The list of the produced syntax trees. 1630 """ 1631 return self._parse( 1632 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1633 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1635 def parse_into( 1636 self, 1637 expression_types: exp.IntoType, 1638 raw_tokens: t.List[Token], 1639 sql: t.Optional[str] = None, 1640 ) -> t.List[t.Optional[exp.Expression]]: 1641 """ 1642 Parses a list of tokens into a given Expression type. If a collection of Expression 1643 types is given instead, this method will try to parse the token list into each one 1644 of them, stopping at the first for which the parsing succeeds. 1645 1646 Args: 1647 expression_types: The expression type(s) to try and parse the token list into. 1648 raw_tokens: The list of tokens. 1649 sql: The original SQL string, used to produce helpful debug messages. 1650 1651 Returns: 1652 The target Expression. 1653 """ 1654 errors = [] 1655 for expression_type in ensure_list(expression_types): 1656 parser = self.EXPRESSION_PARSERS.get(expression_type) 1657 if not parser: 1658 raise TypeError(f"No parser registered for {expression_type}") 1659 1660 try: 1661 return self._parse(parser, raw_tokens, sql) 1662 except ParseError as e: 1663 e.errors[0]["into_expression"] = expression_type 1664 errors.append(e) 1665 1666 raise ParseError( 1667 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1668 errors=merge_errors(errors), 1669 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1709 def check_errors(self) -> None: 1710 """Logs or raises any found errors, depending on the chosen error level setting.""" 1711 if self.error_level == ErrorLevel.WARN: 1712 for error in self.errors: 1713 logger.error(str(error)) 1714 elif self.error_level == ErrorLevel.RAISE and self.errors: 1715 raise ParseError( 1716 concat_messages(self.errors, self.max_errors), 1717 errors=merge_errors(self.errors), 1718 )
Logs or raises any found errors, depending on the chosen error level setting.
1720 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1721 """ 1722 Appends an error in the list of recorded errors or raises it, depending on the chosen 1723 error level setting. 1724 """ 1725 token = token or self._curr or self._prev or Token.string("") 1726 start = token.start 1727 end = token.end + 1 1728 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1729 highlight = self.sql[start:end] 1730 end_context = self.sql[end : end + self.error_message_context] 1731 1732 error = ParseError.new( 1733 f"{message}. Line {token.line}, Col: {token.col}.\n" 1734 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1735 description=message, 1736 line=token.line, 1737 col=token.col, 1738 start_context=start_context, 1739 highlight=highlight, 1740 end_context=end_context, 1741 ) 1742 1743 if self.error_level == ErrorLevel.IMMEDIATE: 1744 raise error 1745 1746 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1748 def expression( 1749 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1750 ) -> E: 1751 """ 1752 Creates a new, validated Expression. 1753 1754 Args: 1755 exp_class: The expression class to instantiate. 1756 comments: An optional list of comments to attach to the expression. 1757 kwargs: The arguments to set for the expression along with their respective values. 1758 1759 Returns: 1760 The target expression. 1761 """ 1762 instance = exp_class(**kwargs) 1763 instance.add_comments(comments) if comments else self._add_comments(instance) 1764 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1771 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1772 """ 1773 Validates an Expression, making sure that all its mandatory arguments are set. 1774 1775 Args: 1776 expression: The expression to validate. 1777 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1778 1779 Returns: 1780 The validated expression. 1781 """ 1782 if self.error_level != ErrorLevel.IGNORE: 1783 for error_message in expression.error_messages(args): 1784 self.raise_error(error_message) 1785 1786 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4876 def parse_set_operation( 4877 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4878 ) -> t.Optional[exp.Expression]: 4879 start = self._index 4880 _, side_token, kind_token = self._parse_join_parts() 4881 4882 side = side_token.text if side_token else None 4883 kind = kind_token.text if kind_token else None 4884 4885 if not self._match_set(self.SET_OPERATIONS): 4886 self._retreat(start) 4887 return None 4888 4889 token_type = self._prev.token_type 4890 4891 if token_type == TokenType.UNION: 4892 operation: t.Type[exp.SetOperation] = exp.Union 4893 elif token_type == TokenType.EXCEPT: 4894 operation = exp.Except 4895 else: 4896 operation = exp.Intersect 4897 4898 comments = self._prev.comments 4899 4900 if self._match(TokenType.DISTINCT): 4901 distinct: t.Optional[bool] = True 4902 elif self._match(TokenType.ALL): 4903 distinct = False 4904 else: 4905 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4906 if distinct is None: 4907 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4908 4909 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4910 "STRICT", "CORRESPONDING" 4911 ) 4912 if self._match_text_seq("CORRESPONDING"): 4913 by_name = True 4914 if not side and not kind: 4915 kind = "INNER" 4916 4917 on_column_list = None 4918 if by_name and self._match_texts(("ON", "BY")): 4919 on_column_list = self._parse_wrapped_csv(self._parse_column) 4920 4921 expression = self._parse_select( 4922 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4923 ) 4924 4925 return self.expression( 4926 operation, 4927 comments=comments, 4928 this=this, 4929 distinct=distinct, 4930 by_name=by_name, 4931 expression=expression, 4932 side=side, 4933 kind=kind, 4934 on=on_column_list, 4935 )