@@ -515,12 +515,9 @@ function parse_to_type(key::Keyword, val)
515515 elseif key. value_type == Char && length (val) == 1
516516 return val[1 ]
517517 end
518- try
519- val = parse (key. value_type, val)
520- return val
521- catch
522- throw (ArgumentError (" Could not parse $val for keyword $(key. name) , expected $(key. value_type) " ))
523- end
518+ val = tryparse (key. value_type, val)
519+ isnothing (val) && throw (ArgumentError (" Could not parse $val for keyword $(key. name) , expected $(key. value_type) " ))
520+ return val
524521end
525522
526523const keywords = [
@@ -545,6 +542,9 @@ const keywords = [
545542 Keyword (Float64, " x" , x , operators),
546543 Keyword (Float64, " y" , y , operators),
547544 Keyword (Float64, " z" , z , operators),
545+ ]
546+
547+ const macro_keywords = [
548548 MacroKeyword (" standard" , standardselector),
549549 MacroKeyword (" hetero" , heteroselector),
550550 MacroKeyword (" backbone" , backboneselector),
@@ -569,37 +569,209 @@ const keywords = [
569569 MacroKeyword (" all" , allselector),
570570]
571571
572- # See https://discourse.julialang.org/t/parsing-selection-syntax/43632/9
573- parse_query (selection:: AbstractString ) = parse_query_vector (split (selection))
574-
575- function parse_query_vector (s:: AbstractVector{<:AbstractString} )
576- # or, and, not
577- if (i = findfirst (== (" or" ), s)) != = nothing
578- deleteat! (s, i)
579- return (| , parse_query_vector .((s[1 : i- 1 ], s[i: end ]))... )
580- elseif (i = findfirst (== (" and" ), s)) != = nothing
581- deleteat! (s, i)
582- return (& , parse_query_vector .((s[1 : i- 1 ], s[i: end ]))... )
583- elseif (i = findfirst (== (" not" ), s)) != = nothing
584- deleteat! (s, i)
585- return (! , parse_query_vector (s[i: end ]))
586- # Keywords
587- else
588- for key in keywords
589- if (i = findfirst (== (key. name), s)) != = nothing
590- deleteat! (s, i)
591- return key (s)
592- end
593- end
594- throw (ArgumentError ((" Unable to parse selection string: $s " )))
595- end
572+ #=
573+ parse_query(selection:String)
574+
575+ Calls `parse_query_vector` after splitting the selection string.
576+
577+ =#
578+ function parse_query (selection:: String )
579+ s = replace (selection, " (" => " ( " , " )" => " ) " )
580+ return parse_query_vector (split (s))
596581end
597582
598583function apply_query (q, a)
599584 if ! (q isa Tuple)
600- return q (a)
585+ q (a)
601586 else
602587 f, args = Iterators. peel (q)
603- return f (apply_query .(args, Ref (a))... )
588+ f (apply_query .(args, Ref (a))... )
589+ end
590+ end
591+
592+ parse_error (str) = throw (ArgumentError (str))
593+
594+ #
595+ # Obs: the following code were generated by Gemini 2.5-Pro, with modifications,
596+ # and then tested.
597+ #
598+
599+ # New helper functions
600+ function is_operator (token:: AbstractString )
601+ return token == " and" || token == " or" || token == " not"
602+ end
603+
604+ function is_fully_enclosed (tokens:: AbstractVector{<:AbstractString} )
605+ level = 0
606+ # Check if the first '(' matches the last ')' without level becoming zero in between
607+ # for any token except the last one.
608+ for i in firstindex (tokens): (lastindex (tokens)- 1 )
609+ if tokens[i] == " ("
610+ level += 1
611+ elseif tokens[i] == " )"
612+ level -= 1
613+ if level == 0 # Closed too early, means not fully enclosed by the outermost pair
614+ return false
615+ end
616+ end
617+ end
618+ # After iterating up to tokens[end-1], level should be 1 if tokens[begin] was '('
619+ # and it correctly matches tokens[end]. If level is not 1, it means mismatched parentheses within.
620+ return level == 1
621+ end
622+
623+ function find_operator_at_level_zero (op_str:: String , tokens:: AbstractVector{<:AbstractString} )
624+ level = 0
625+ # Find first occurrence from left to right (maintaining current style)
626+ for i in eachindex (tokens)
627+ if tokens[i] == " ("
628+ level += 1
629+ elseif tokens[i] == " )"
630+ level -= 1
631+ if level < 0
632+ parse_error (" Mismatched parentheses: too many closing parentheses." )
633+ end
634+ elseif tokens[i] == op_str && level == 0
635+ return i
636+ end
637+ end
638+ if level != 0
639+ parse_error (" Mismatched parentheses: not enough closing parentheses." )
604640 end
641+ return 0 # Not found at level zero
605642end
643+
644+ # Modified parse_query_vector
645+ function parse_query_vector (s_vec_const:: AbstractVector{<:AbstractString} )
646+ s_vec = s_vec_const # Operate on slices or copies, not modifying original array passed around
647+ if isempty (s_vec)
648+ parse_error (" Empty query segment." )
649+ end
650+
651+ # Handle expressions fully enclosed in matching parentheses
652+ # e.g. "(A and B)" should be parsed by parsing "A and B"
653+ temp_s_vec = s_vec # Use a temporary variable for iterative stripping
654+ while length (temp_s_vec) > 1 && temp_s_vec[begin ] == " (" && temp_s_vec[end ] == " )" && is_fully_enclosed (temp_s_vec)
655+ temp_s_vec = temp_s_vec[begin + 1 : end - 1 ]
656+ if isempty (temp_s_vec)
657+ parse_error (" Empty parentheses in query: '()'" )
658+ end
659+ end
660+ s_vec = temp_s_vec # Assign the stripped version back
661+
662+ # Operator precedence: OR, then AND, then NOT (as in original code for splitting)
663+ # Find 'or' not within parentheses
664+ if (i = find_operator_at_level_zero (" or" , s_vec)) > 0
665+ left_tokens = s_vec[begin : i- 1 ]
666+ right_tokens = s_vec[i+ 1 : end ]
667+ if isempty (left_tokens) || isempty (right_tokens)
668+ parse_error (" Syntax error near 'or'. Missing operand." )
669+ end
670+ return (| , parse_query_vector (left_tokens), parse_query_vector (right_tokens))
671+
672+ elseif (i = find_operator_at_level_zero (" and" , s_vec)) > 0
673+ left_tokens = s_vec[begin : i- 1 ]
674+ right_tokens = s_vec[i+ 1 : end ]
675+ if isempty (left_tokens) || isempty (right_tokens)
676+ parse_error (" Syntax error near 'and'. Missing operand." )
677+ end
678+ return (& , parse_query_vector (left_tokens), parse_query_vector (right_tokens))
679+
680+ elseif s_vec[begin ] == " not"
681+ if length (s_vec) == 1
682+ parse_error (" Syntax error near 'not'. Missing operand." )
683+ end
684+ remaining_tokens = s_vec[begin + 1 : end ]
685+ if isempty (remaining_tokens) # Should be caught by length check, but defensive
686+ parse_error (" Syntax error near 'not'. Missing operand." )
687+ end
688+ # Prevent "not and", "not or", "not not" if "not" is not a general prefix operator in this DSL
689+ if is_operator (remaining_tokens[begin ]) && remaining_tokens[begin ] != " not" # allow "not not" if desired, though unusual
690+ parse_error (" Operator '$(remaining_tokens[begin ]) ' cannot directly follow 'not'." )
691+ end
692+ return (! , parse_query_vector (remaining_tokens))
693+
694+ # Base case: No top-level logical operators. Must be a keyword phrase.
695+ else
696+ token_keyword_name = s_vec[begin ]
697+
698+ # Standard Keywords (e.g., "name", "resnum", "index")
699+ for key_obj in keywords # key_obj is of type Keyword
700+ if token_keyword_name == key_obj. name
701+ if length (s_vec) == 1 # Keyword name token only, no arguments
702+ parse_error (" Keyword '$(key_obj. name) ' requires at least one argument." )
703+ end
704+
705+ keyword_args = s_vec[begin + 1 : end ] # Arguments following the keyword name
706+
707+ is_operator_syntax_match = false
708+ if ! isempty (keyword_args)
709+ first_arg = keyword_args[1 ]
710+ for op_tuple in key_obj. operators # e.g., ("<", isless)
711+ operator_string = op_tuple[1 ]
712+ if first_arg == operator_string
713+ # Expected form: "keyword operator value", so keyword_args should be ["operator", "value"] (length 2)
714+ if length (keyword_args) == 2
715+ is_operator_syntax_match = true
716+ else
717+ parse_error (
718+ " Malformed operator expression for keyword '$(key_obj. name) '. " *
719+ " Expected 'keyword $operator_string value'. Got: $(join (s_vec, " " )) "
720+ )
721+ end
722+ break # Operator string found and processed
723+ end
724+ end
725+ end
726+
727+ if is_operator_syntax_match
728+ # Case: "keyword operator value", e.g., "resnum < 13"
729+ # keyword_args will be ["<", "13"]. The Keyword functor handles this structure.
730+ return key_obj (keyword_args)
731+ else
732+ # Sanity check for multi-value: ensure no operators are present in the value list.
733+ # E.g. "resnum 10 < 20" is an error here because "10" is not an operator,
734+ # but "<" appears later in a context expecting only values.
735+ for arg_val in keyword_args
736+ for op_tuple in key_obj. operators
737+ if arg_val == op_tuple[1 ] # op_tuple[1] is the operator string
738+ parse_error (
739+ " Syntax error for keyword '$(key_obj. name) '. Operator '$(op_tuple[1 ]) ' found in an unexpected position. " *
740+ " Arguments: $(join (keyword_args, " " )) . Operator expressions must be 'keyword $(op_tuple[1 ]) value'."
741+ )
742+ end
743+ end
744+ end
745+
746+ # Proceed with implicit equality (single value or multi-value OR).
747+ if length (keyword_args) == 1
748+ # e.g., "name CA" -> keyword_args = ["CA"]
749+ # The Keyword functor handles this as implicit equality.
750+ return key_obj (keyword_args)
751+ else
752+ # Multi-value implicit OR case, e.g., "resname ARG GLU ASP"
753+ # keyword_args = ["ARG", "GLU", "ASP"]
754+ current_expr_tree = key_obj ([keyword_args[end ]]) # Process the last value
755+ for k_idx in (length (keyword_args)- 1 ): - 1 : firstindex (keyword_args) # Iterate remaining values
756+ current_expr_tree = (| , key_obj ([keyword_args[k_idx]]), current_expr_tree)
757+ end
758+ return current_expr_tree
759+ end
760+ end
761+ end
762+ end
763+
764+ # Macro Keywords (e.g., "protein", "water")
765+ for key_obj in macro_keywords
766+ if token_keyword_name == key_obj. name
767+ if length (s_vec) > 1
768+ parse_error (" Macro keyword '$(key_obj. name) ' does not take arguments. Unexpected tokens: $(join (s_vec[begin + 1 : end ], " " )) " )
769+ end
770+ # MacroKeyword functor expects an argument list (empty for macros)
771+ return key_obj (String[])
772+ end
773+ end
774+
775+ parse_error (" Unknown keyword or invalid syntax at: '$(join (s_vec, " " )) '" )
776+ end
777+ end
0 commit comments