Skip to content

Commit 1e7747a

Browse files
committed
Fixed XMLDecompress
1 parent 7e6d128 commit 1e7747a

4 files changed

Lines changed: 73 additions & 24 deletions

File tree

samples/decompressed.xml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<users>
2+
<user>
3+
<id>1</id>
4+
<name>user1</name>
5+
<posts>
6+
<post>
7+
Lorem ipsum dolor sit amet.
8+
</post>
9+
</posts>
10+
<followers>
11+
<follower>
12+
<id>2</id>
13+
</follower>
14+
</followers>
15+
</user>
16+
</users>
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<users>
2+
<user>
3+
<id>1</>
4+
<name>user1</>
5+
<posts>
6+
<post>
7+
Lorem ipsum dolor sit amet.
8+
</>
9+
</>
10+
<followers>
11+
<follower>
12+
<id>2</>
13+
</>
14+
</>
15+
</>
16+
</>

src/modules/xml_compressor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@ def compress(self, output_path):
1313
file.write(compressed_data)
1414

1515
# Usage
16-
#compressor = XMLCompressor('../../samples/large_sample.xml')
17-
#compressor.compress('../../samples/output.compressed')
16+
compressor = XMLCompressor('../../samples/large_sample.xml')
17+
compressor.compress('../../samples/output.compressed')
1818

src/modules/xml_decompressor.py

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,50 @@
1-
import re
21
class XMLDecompressor:
32
def __init__(self, input_path):
43
self.input_path = input_path
54

5+
def extract_tag(self, line, start):
6+
# Extract tag from line
7+
tag = ""
8+
for char in line[start:]:
9+
if char == ' ' or char == '>':
10+
break
11+
elif char == '<':
12+
continue
13+
elif char == '?' or char == '!':
14+
return -1
15+
tag += char
16+
return tag
17+
618
def decompress(self, output_path):
719
with open(self.input_path, 'r', encoding='utf-8') as file:
8-
compressed_data = file.read()
9-
10-
open_tags = []
11-
12-
def record_open_tag(match):
13-
tag_name = match.group(1)
14-
open_tags.append(tag_name)
15-
return match.group(0)
20+
lines = file.readlines()
21+
open_tags = []
1622

17-
def replace_closing_tag(match):
18-
if open_tags:
19-
tag_name = open_tags.pop()
20-
return f'</{tag_name}>'
21-
return match.group(0)
23+
for i, line in enumerate(lines):
24+
for j, char in enumerate(line):
25+
if char == '<':
26+
tag = self.extract_tag(line, j)
27+
28+
if tag == -1:
29+
continue
30+
# Add opening tag to stack
31+
if tag[0] != '/':
32+
open_tags.append(tag)
33+
else:
34+
# If closing tag with no opening tag
35+
if not open_tags:
36+
raise Exception(f"Missing opening tag for {tag[1:]}")
37+
else:
38+
# Replace closing tag with correct closing tag
39+
tag_name = open_tags.pop()
40+
lines[i] = lines[i][:j] + f'</{tag_name}>' + lines[i][j + len(tag) + 2:]
41+
42+
# Write decompressed data to output file
43+
with open(output_path, 'w', encoding='utf-8') as file:
44+
file.writelines(lines)
2245

23-
# Record all open tags
24-
compressed_data = re.sub(r'<([^/!\s>]+)[^>]*>', record_open_tag, compressed_data)
25-
# Replace </> with the correct closing tags
26-
decompressed_data = re.sub(r'</>', replace_closing_tag, compressed_data)
2746

28-
with open(output_path, 'w', encoding='utf-8') as file:
29-
file.write(decompressed_data)
3047

3148
# Usage
32-
#decompressor = XMLDecompressor('../../samples/output.compressed')
33-
#decompressor.decompress('../../samples/decompressed_output.xml')
49+
decompressor = XMLDecompressor(r'../samples/compressed.xml')
50+
decompressor.decompress(r'../samples/decompressed.xml')

0 commit comments

Comments
 (0)