Skip to content

Commit 952f45b

Browse files
Change search_word and search_topic in the PostSearch class
1 parent bb9ab4f commit 952f45b

1 file changed

Lines changed: 93 additions & 44 deletions

File tree

src/postsearch/post_search.py

Lines changed: 93 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,115 @@
11
class PostSearch:
2-
3-
def __init__(self, xml_file):
2+
def __init__(self, xml_file):
43
"""
54
Initialize the PostSearch object by loading the XML file content as a string.
65
"""
76
with open(xml_file, 'r', encoding='utf-8') as file:
8-
self.xml_content:str = file.read()
7+
self.xml_content = file.read()
98

10-
def search_word(self, word:str) -> list[str]:
11-
9+
def search_word(self, word):
10+
"""
11+
Search for posts containing a specific word in the <body> of <post> (case-insensitive).
12+
"""
1213
posts = []
13-
start = 0
14+
user_start = 0
15+
word_lower = word.lower() # Convert the search word to lowercase for case-insensitive comparison
16+
17+
# Iterate over <user> elements
1418
while True:
15-
start = self.xml_content.find("<post", start)
16-
if start == -1:
19+
user_start = self.xml_content.find("<user>", user_start)
20+
if user_start == -1:
1721
break
18-
end = self.xml_content.find("</post>", start)
19-
if end == -1:
20-
break
21-
post_content_start = self.xml_content.find(">", start) + 1
22+
user_end = self.xml_content.find("</user>", user_start)
23+
user_content = self.xml_content[user_start:user_end]
24+
25+
# Iterate over <post> elements within the user
26+
post_start = 0
27+
while True:
28+
post_start = user_content.find("<post>", post_start)
29+
if post_start == -1:
30+
break
31+
post_end = user_content.find("</post>", post_start)
32+
post_content = user_content[post_start:post_end]
2233

23-
post_content = self.xml_content[post_content_start:end].strip()
24-
if word in post_content:
25-
if ">" in post_content:
26-
post_content = post_content.split(">", 1)[-1]
27-
posts.append(post_content)
28-
start = end + 7 # 7 is the length of "</post>"
34+
# Extract <body> and check for the word
35+
body_content = self._extract_tag_value(post_content, "body")
36+
if body_content and word_lower in body_content.lower(): # Case-insensitive comparison
37+
posts.append(body_content.strip())
38+
post_start = post_end + len("</post>")
39+
user_start = user_end + len("</user>")
2940
return posts
3041

31-
def search_topic(self, topic:str) -> list[str]:
42+
def search_topic(self, topic):
43+
"""
44+
Search for posts where the given topic is mentioned in the <topic> tag (case-insensitive).
45+
"""
3246
posts = []
33-
start = 0
47+
user_start = 0
48+
topic_lower = topic.lower() # Convert the search topic to lowercase for case-insensitive comparison
49+
50+
# Iterate over <user> elements
3451
while True:
35-
start = self.xml_content.find("<post ", start)
36-
if start == -1:
52+
user_start = self.xml_content.find("<user>", user_start)
53+
if user_start == -1:
3754
break
38-
topic_start = self.xml_content.find("topic=\"", start)
39-
if topic_start == -1 or topic_start > self.xml_content.find(">", start):
40-
start = self.xml_content.find("</post>", start) + 7
41-
continue
42-
topic_end = self.xml_content.find("\"", topic_start + 7)
43-
post_topic = self.xml_content[topic_start + 7:topic_end]
44-
if post_topic == topic:
45-
post_content_start = self.xml_content.find(">", start) + 1
46-
end = self.xml_content.find("</post>", start)
47-
if end == -1:
55+
user_end = self.xml_content.find("</user>", user_start)
56+
user_content = self.xml_content[user_start:user_end]
57+
58+
# Iterate over <post> elements within the user
59+
post_start = 0
60+
while True:
61+
post_start = user_content.find("<post>", post_start)
62+
if post_start == -1:
4863
break
49-
post_content = self.xml_content[post_content_start:end].strip()
50-
posts.append(post_content)
51-
start = self.xml_content.find("</post>", start) + 7
64+
post_end = user_content.find("</post>", post_start)
65+
post_content = user_content[post_start:post_end]
66+
67+
# Check if the topic is mentioned
68+
topic_content = self._extract_tag_value(post_content, "topic")
69+
if topic_content and topic_content.lower() == topic_lower: # Case-insensitive comparison
70+
# Extract the body of the post
71+
body_content = self._extract_tag_value(post_content, "body")
72+
if body_content:
73+
posts.append(body_content.strip())
74+
post_start = post_end + len("</post>")
75+
user_start = user_end + len("</user>")
5276
return posts
53-
"""
77+
78+
def _extract_tag_value(self, content, tag):
79+
"""
80+
Extract the value of a given tag from the content.
81+
"""
82+
start_tag = f"<{tag}>"
83+
end_tag = f"</{tag}>"
84+
start = content.find(start_tag)
85+
if start == -1:
86+
return None
87+
start += len(start_tag)
88+
end = content.find(end_tag, start)
89+
if end == -1:
90+
return None
91+
return content[start:end]
92+
93+
5494

5595
# Example Usage
56-
# Initialize the class with an XML file
57-
searcher = PostSearch("samples\post_search_sample.xml") # Ensure the path to your XML file is correct
96+
# Ensure the XML file path is correct
97+
xml_file_path = r"samples/test.xml" # Use raw string or forward slashes
5898

59-
# Search for posts containing a specific word
60-
word_posts = searcher.search_word("shaping")
61-
print("Posts containing :", word_posts)
99+
searcher = PostSearch(xml_file_path)
100+
101+
"""
102+
# Search for posts containing a specific word (case-insensitive)
103+
word = "EXercitation "
104+
word_results = searcher.search_word(word)
105+
print(f"Posts containing the word '{word}':")
106+
for post_body in word_results:
107+
print(post_body)
62108
63-
# Search for posts with a specific topic
64-
topic_posts = searcher.search_topic("technology")
65-
print("Posts with topic :", topic_posts)
109+
# Search for posts with a specific topic (case-insensitive)
110+
topic = "tOPiC1"
111+
topic_results = searcher.search_topic(topic)
112+
print(f"\nPosts with topic '{topic}':")
113+
for post_body in topic_results:
114+
print(post_body)
66115
"""

0 commit comments

Comments
 (0)