@@ -545,6 +545,9 @@ const keywords = [
545545 Keyword (Float64, " x" , x , operators),
546546 Keyword (Float64, " y" , y , operators),
547547 Keyword (Float64, " z" , z , operators),
548+ ]
549+
550+ const macro_keywords = [
548551 MacroKeyword (" standard" , standardselector),
549552 MacroKeyword (" hetero" , heteroselector),
550553 MacroKeyword (" backbone" , backboneselector),
@@ -569,37 +572,220 @@ const keywords = [
569572 MacroKeyword (" all" , allselector),
570573]
571574
572- # See https://discourse.julialang.org/t/parsing-selection-syntax/43632/9
573- parse_query (selection:: AbstractString ) = parse_query_vector (split (selection))
574-
575- function parse_query_vector (s:: AbstractVector{<:AbstractString} )
576- # or, and, not
577- if (i = findfirst (== (" or" ), s)) != = nothing
578- deleteat! (s, i)
579- return (| , parse_query_vector .((s[1 : i- 1 ], s[i: end ]))... )
580- elseif (i = findfirst (== (" and" ), s)) != = nothing
581- deleteat! (s, i)
582- return (& , parse_query_vector .((s[1 : i- 1 ], s[i: end ]))... )
583- elseif (i = findfirst (== (" not" ), s)) != = nothing
584- deleteat! (s, i)
585- return (! , parse_query_vector (s[i: end ]))
586- # Keywords
587- else
588- for key in keywords
589- if (i = findfirst (== (key. name), s)) != = nothing
590- deleteat! (s, i)
591- return key (s)
592- end
593- end
594- throw (ArgumentError ((" Unable to parse selection string: $s " )))
595- end
575+ #=
576+ parse_query(selection:String)
577+
578+ Calls `parse_query_vector` after splitting the selection string.
579+
580+ =#
581+ function parse_query (selection:: String )
582+ s = replace (selection, " (" => " ( " , " )" => " ) " )
583+ return parse_query_vector (split (s))
596584end
597585
598586function apply_query (q, a)
599587 if ! (q isa Tuple)
600- return q (a)
588+ q (a)
601589 else
602590 f, args = Iterators. peel (q)
603- return f (apply_query .(args, Ref (a))... )
591+ f (apply_query .(args, Ref (a))... )
592+ end
593+ end
594+
595+ parse_error (str) = throw (ArgumentError (str))
596+
597+ #
598+ # Obs: the following code were generated by Gemini 2.5-Pro, with modifications,
599+ # and then tested.
600+ #
601+
602+ # New helper functions
603+ function is_operator (token:: AbstractString )
604+ return token == " and" || token == " or" || token == " not"
605+ end
606+
607+ function is_fully_enclosed (tokens:: AbstractVector{<:AbstractString} )
608+ level = 0
609+ # Check if the first '(' matches the last ')' without level becoming zero in between
610+ # for any token except the last one.
611+ for i in firstindex (tokens): (lastindex (tokens)- 1 )
612+ if tokens[i] == " ("
613+ level += 1
614+ elseif tokens[i] == " )"
615+ level -= 1
616+ if level == 0 # Closed too early, means not fully enclosed by the outermost pair
617+ return false
618+ end
619+ end
620+ end
621+ # After iterating up to tokens[end-1], level should be 1 if tokens[begin] was '('
622+ # and it correctly matches tokens[end]. If level is not 1, it means mismatched parentheses within.
623+ return level == 1
624+ end
625+
626+ function find_operator_at_level_zero (op_str:: String , tokens:: AbstractVector{<:AbstractString} )
627+ level = 0
628+ # Find first occurrence from left to right (maintaining current style)
629+ for i in eachindex (tokens)
630+ if tokens[i] == " ("
631+ level += 1
632+ elseif tokens[i] == " )"
633+ level -= 1
634+ if level < 0
635+ parse_error (" Mismatched parentheses: too many closing parentheses." )
636+ end
637+ elseif tokens[i] == op_str && level == 0
638+ return i
639+ end
640+ end
641+ if level != 0
642+ parse_error (" Mismatched parentheses: not enough closing parentheses." )
604643 end
644+ return 0 # Not found at level zero
605645end
646+
647+ # Modified parse_query_vector
648+ function parse_query_vector (s_vec_const:: AbstractVector{<:AbstractString} )
649+ s_vec = s_vec_const # Operate on slices or copies, not modifying original array passed around
650+
651+ if isempty (s_vec)
652+ parse_error (" Empty query segment." )
653+ end
654+
655+ # Handle expressions fully enclosed in matching parentheses
656+ # e.g. "(A and B)" should be parsed by parsing "A and B"
657+ temp_s_vec = s_vec # Use a temporary variable for iterative stripping
658+ while length (temp_s_vec) > 1 && temp_s_vec[begin ] == " (" && temp_s_vec[end ] == " )" && is_fully_enclosed (temp_s_vec)
659+ temp_s_vec = temp_s_vec[begin + 1 : end - 1 ]
660+ if isempty (temp_s_vec)
661+ parse_error (" Empty parentheses in query: '()'" )
662+ end
663+ end
664+ s_vec = temp_s_vec # Assign the stripped version back
665+
666+ # Operator precedence: OR, then AND, then NOT (as in original code for splitting)
667+ # Find 'or' not within parentheses
668+ if (i = find_operator_at_level_zero (" or" , s_vec)) > 0
669+ left_tokens = s_vec[begin : i- 1 ]
670+ right_tokens = s_vec[i+ 1 : end ]
671+ if isempty (left_tokens) || isempty (right_tokens)
672+ parse_error (" Syntax error near 'or'. Missing operand." )
673+ end
674+ return (| , parse_query_vector (left_tokens), parse_query_vector (right_tokens))
675+
676+ elseif (i = find_operator_at_level_zero (" and" , s_vec)) > 0
677+ left_tokens = s_vec[begin : i- 1 ]
678+ right_tokens = s_vec[i+ 1 : end ]
679+ if isempty (left_tokens) || isempty (right_tokens)
680+ parse_error (" Syntax error near 'and'. Missing operand." )
681+ end
682+ return (& , parse_query_vector (left_tokens), parse_query_vector (right_tokens))
683+
684+ elseif s_vec[begin ] == " not"
685+ if length (s_vec) == 1
686+ parse_error (" Syntax error near 'not'. Missing operand." )
687+ end
688+ remaining_tokens = s_vec[begin + 1 : end ]
689+ if isempty (remaining_tokens) # Should be caught by length check, but defensive
690+ parse_error (" Syntax error near 'not'. Missing operand." )
691+ end
692+ # Prevent "not and", "not or", "not not" if "not" is not a general prefix operator in this DSL
693+ if is_operator (remaining_tokens[begin ]) && remaining_tokens[begin ] != " not" # allow "not not" if desired, though unusual
694+ parse_error (" Operator '$(remaining_tokens[begin ]) ' cannot directly follow 'not'." )
695+ end
696+ return (! , parse_query_vector (remaining_tokens))
697+
698+ # Base case: No top-level logical operators. Must be a keyword phrase.
699+ else
700+ # if isempty(s_vec) # Should not happen if initial checks are correct
701+ # parse_error("Unexpected empty query segment.")
702+ # end
703+ token_keyword_name = s_vec[begin ]
704+
705+ # Standard Keywords (e.g., "name", "resnum", "index")
706+ for key_obj in keywords # key_obj is of type Keyword
707+ if token_keyword_name == key_obj. name
708+ if length (s_vec) == 1 # Keyword name token only, no arguments
709+ parse_error (" Keyword '$(key_obj. name) ' requires at least one argument." )
710+ end
711+
712+ keyword_args = s_vec[begin + 1 : end ] # Arguments following the keyword name
713+
714+ is_operator_syntax_match = false
715+ if ! isempty (keyword_args)
716+ first_arg = keyword_args[1 ]
717+ for op_tuple in key_obj. operators # e.g., ("<", isless)
718+ operator_string = op_tuple[1 ]
719+ if first_arg == operator_string
720+ # Expected form: "keyword operator value", so keyword_args should be ["operator", "value"] (length 2)
721+ if length (keyword_args) == 2
722+ is_operator_syntax_match = true
723+ else
724+ parse_error (
725+ " Malformed operator expression for keyword '$(key_obj. name) '. " *
726+ " Expected 'keyword $operator_string value'. Got: $(join (s_vec, " " )) "
727+ )
728+ end
729+ break # Operator string found and processed
730+ end
731+ end
732+ end
733+
734+ if is_operator_syntax_match
735+ # Case: "keyword operator value", e.g., "resnum < 13"
736+ # keyword_args will be ["<", "13"]. The Keyword functor handles this structure.
737+ return key_obj (keyword_args)
738+ else
739+ # Case: Not a recognized "keyword operator value" structure.
740+ # This implies implicit equality: "keyword value" or "keyword value1 value2 ..." (for OR expansion).
741+
742+ # if isempty(keyword_args) # Should have been caught by length(s_vec) == 1
743+ # parse_error("No arguments provided for keyword '$(key_obj.name)'.") # Should be unreachable
744+ # end
745+
746+ # Sanity check for multi-value: ensure no operators are present in the value list.
747+ # E.g. "resnum 10 < 20" is an error here because "10" is not an operator,
748+ # but "<" appears later in a context expecting only values.
749+ for arg_val in keyword_args
750+ for op_tuple in key_obj. operators
751+ if arg_val == op_tuple[1 ] # op_tuple[1] is the operator string
752+ parse_error (
753+ " Syntax error for keyword '$(key_obj. name) '. Operator '$(op_tuple[1 ]) ' found in an unexpected position. " *
754+ " Arguments: $(join (keyword_args, " " )) . Operator expressions must be 'keyword $(op_tuple[1 ]) value'."
755+ )
756+ end
757+ end
758+ end
759+
760+ # Proceed with implicit equality (single value or multi-value OR).
761+ if length (keyword_args) == 1
762+ # e.g., "name CA" -> keyword_args = ["CA"]
763+ # The Keyword functor handles this as implicit equality.
764+ return key_obj (keyword_args)
765+ else
766+ # Multi-value implicit OR case, e.g., "resname ARG GLU ASP"
767+ # keyword_args = ["ARG", "GLU", "ASP"]
768+ current_expr_tree = key_obj ([keyword_args[end ]]) # Process the last value
769+ for k_idx in (length (keyword_args)- 1 ): - 1 : firstindex (keyword_args) # Iterate remaining values
770+ current_expr_tree = (| , key_obj ([keyword_args[k_idx]]), current_expr_tree)
771+ end
772+ return current_expr_tree
773+ end
774+ end
775+ end
776+ end
777+
778+ # Macro Keywords (e.g., "protein", "water")
779+ for key_obj in macro_keywords
780+ if token_keyword_name == key_obj. name
781+ if length (s_vec) > 1
782+ parse_error (" Macro keyword '$(key_obj. name) ' does not take arguments. Unexpected tokens: $(join (s_vec[begin + 1 : end ], " " )) " )
783+ end
784+ # MacroKeyword functor expects an argument list (empty for macros)
785+ return key_obj (String[])
786+ end
787+ end
788+
789+ parse_error (" Unknown keyword or invalid syntax at: '$(join (s_vec, " " )) '" )
790+ end
791+ end
0 commit comments