11class PostSearch :
2-
3- def __init__ (self , xml_file ):
2+ def __init__ (self , xml_file ):
43 """
54 Initialize the PostSearch object by loading the XML file content as a string.
65 """
76 with open (xml_file , 'r' , encoding = 'utf-8' ) as file :
8- self .xml_content : str = file .read ()
7+ self .xml_content = file .read ()
98
10- def search_word (self , word :str ) -> list [str ]:
11-
9+ def search_word (self , word ):
10+ """
11+ Search for posts containing a specific word in the <body> of <post> (case-insensitive).
12+ """
1213 posts = []
13- start = 0
14+ user_start = 0
15+ word_lower = word .lower () # Convert the search word to lowercase for case-insensitive comparison
16+
17+ # Iterate over <user> elements
1418 while True :
15- start = self .xml_content .find ("<post " , start )
16- if start == - 1 :
19+ user_start = self .xml_content .find ("<user> " , user_start )
20+ if user_start == - 1 :
1721 break
18- end = self .xml_content .find ("</post>" , start )
19- if end == - 1 :
20- break
21- post_content_start = self .xml_content .find (">" , start ) + 1
22+ user_end = self .xml_content .find ("</user>" , user_start )
23+ user_content = self .xml_content [user_start :user_end ]
24+
25+ # Iterate over <post> elements within the user
26+ post_start = 0
27+ while True :
28+ post_start = user_content .find ("<post>" , post_start )
29+ if post_start == - 1 :
30+ break
31+ post_end = user_content .find ("</post>" , post_start )
32+ post_content = user_content [post_start :post_end ]
2233
23- post_content = self . xml_content [ post_content_start : end ]. strip ()
24- if word in post_content :
25- if ">" in post_content :
26- post_content = post_content . split ( ">" , 1 )[ - 1 ]
27- posts . append ( post_content )
28- start = end + 7 # 7 is the length of "</post>"
34+ # Extract <body> and check for the word
35+ body_content = self . _extract_tag_value ( post_content , "body" )
36+ if body_content and word_lower in body_content . lower (): # Case-insensitive comparison
37+ posts . append ( body_content . strip ())
38+ post_start = post_end + len ( "</post>" )
39+ user_start = user_end + len ( "</user>" )
2940 return posts
3041
31- def search_topic (self , topic :str ) -> list [str ]:
42+ def search_topic (self , topic ):
43+ """
44+ Search for posts where the given topic is mentioned in the <topic> tag (case-insensitive).
45+ """
3246 posts = []
33- start = 0
47+ user_start = 0
48+ topic_lower = topic .lower () # Convert the search topic to lowercase for case-insensitive comparison
49+
50+ # Iterate over <user> elements
3451 while True :
35- start = self .xml_content .find ("<post " , start )
36- if start == - 1 :
52+ user_start = self .xml_content .find ("<user> " , user_start )
53+ if user_start == - 1 :
3754 break
38- topic_start = self .xml_content .find ("topic=\" " , start )
39- if topic_start == - 1 or topic_start > self .xml_content .find (">" , start ):
40- start = self .xml_content .find ("</post>" , start ) + 7
41- continue
42- topic_end = self .xml_content .find ("\" " , topic_start + 7 )
43- post_topic = self .xml_content [topic_start + 7 :topic_end ]
44- if post_topic == topic :
45- post_content_start = self .xml_content .find (">" , start ) + 1
46- end = self .xml_content .find ("</post>" , start )
47- if end == - 1 :
55+ user_end = self .xml_content .find ("</user>" , user_start )
56+ user_content = self .xml_content [user_start :user_end ]
57+
58+ # Iterate over <post> elements within the user
59+ post_start = 0
60+ while True :
61+ post_start = user_content .find ("<post>" , post_start )
62+ if post_start == - 1 :
4863 break
49- post_content = self .xml_content [post_content_start :end ].strip ()
50- posts .append (post_content )
51- start = self .xml_content .find ("</post>" , start ) + 7
64+ post_end = user_content .find ("</post>" , post_start )
65+ post_content = user_content [post_start :post_end ]
66+
67+ # Check if the topic is mentioned
68+ topic_content = self ._extract_tag_value (post_content , "topic" )
69+ if topic_content and topic_content .lower () == topic_lower : # Case-insensitive comparison
70+ # Extract the body of the post
71+ body_content = self ._extract_tag_value (post_content , "body" )
72+ if body_content :
73+ posts .append (body_content .strip ())
74+ post_start = post_end + len ("</post>" )
75+ user_start = user_end + len ("</user>" )
5276 return posts
53- """
77+
78+ def _extract_tag_value (self , content , tag ):
79+ """
80+ Extract the value of a given tag from the content.
81+ """
82+ start_tag = f"<{ tag } >"
83+ end_tag = f"</{ tag } >"
84+ start = content .find (start_tag )
85+ if start == - 1 :
86+ return None
87+ start += len (start_tag )
88+ end = content .find (end_tag , start )
89+ if end == - 1 :
90+ return None
91+ return content [start :end ]
92+
93+
5494
5595# Example Usage
56- # Initialize the class with an XML file
57- searcher = PostSearch( "samples\post_search_sample .xml") # Ensure the path to your XML file is correct
96+ # Ensure the XML file path is correct
97+ xml_file_path = r "samples/test .xml" # Use raw string or forward slashes
5898
59- # Search for posts containing a specific word
60- word_posts = searcher.search_word("shaping")
61- print("Posts containing :", word_posts)
99+ searcher = PostSearch (xml_file_path )
100+
101+ """
102+ # Search for posts containing a specific word (case-insensitive)
103+ word = "EXercitation "
104+ word_results = searcher.search_word(word)
105+ print(f"Posts containing the word '{word}':")
106+ for post_body in word_results:
107+ print(post_body)
62108
63- # Search for posts with a specific topic
64- topic_posts = searcher.search_topic("technology")
65- print("Posts with topic :", topic_posts)
109+ # Search for posts with a specific topic (case-insensitive)
110+ topic = "tOPiC1"
111+ topic_results = searcher.search_topic(topic)
112+ print(f"\n Posts with topic '{topic}':")
113+ for post_body in topic_results:
114+ print(post_body)
66115"""
0 commit comments