|
1 | | -import re |
2 | 1 | class XMLDecompressor: |
3 | 2 | def __init__(self, input_path): |
4 | 3 | self.input_path = input_path |
5 | 4 |
|
| 5 | + def extract_tag(self, line, start): |
| 6 | + # Extract tag from line |
| 7 | + tag = "" |
| 8 | + for char in line[start:]: |
| 9 | + if char == ' ' or char == '>': |
| 10 | + break |
| 11 | + elif char == '<': |
| 12 | + continue |
| 13 | + elif char == '?' or char == '!': |
| 14 | + return -1 |
| 15 | + tag += char |
| 16 | + return tag |
| 17 | + |
6 | 18 | def decompress(self, output_path): |
7 | 19 | with open(self.input_path, 'r', encoding='utf-8') as file: |
8 | | - compressed_data = file.read() |
9 | | - |
10 | | - open_tags = [] |
11 | | - |
12 | | - def record_open_tag(match): |
13 | | - tag_name = match.group(1) |
14 | | - open_tags.append(tag_name) |
15 | | - return match.group(0) |
| 20 | + lines = file.readlines() |
| 21 | + open_tags = [] |
16 | 22 |
|
17 | | - def replace_closing_tag(match): |
18 | | - if open_tags: |
19 | | - tag_name = open_tags.pop() |
20 | | - return f'</{tag_name}>' |
21 | | - return match.group(0) |
| 23 | + for i, line in enumerate(lines): |
| 24 | + for j, char in enumerate(line): |
| 25 | + if char == '<': |
| 26 | + tag = self.extract_tag(line, j) |
| 27 | + |
| 28 | + if tag == -1: |
| 29 | + continue |
| 30 | + # Add opening tag to stack |
| 31 | + if tag[0] != '/': |
| 32 | + open_tags.append(tag) |
| 33 | + else: |
| 34 | + # If closing tag with no opening tag |
| 35 | + if not open_tags: |
| 36 | + raise Exception(f"Missing opening tag for {tag[1:]}") |
| 37 | + else: |
| 38 | + # Replace closing tag with correct closing tag |
| 39 | + tag_name = open_tags.pop() |
| 40 | + lines[i] = lines[i][:j] + f'</{tag_name}>' + lines[i][j + len(tag) + 2:] |
| 41 | + |
| 42 | + # Write decompressed data to output file |
| 43 | + with open(output_path, 'w', encoding='utf-8') as file: |
| 44 | + file.writelines(lines) |
22 | 45 |
|
23 | | - # Record all open tags |
24 | | - compressed_data = re.sub(r'<([^/!\s>]+)[^>]*>', record_open_tag, compressed_data) |
25 | | - # Replace </> with the correct closing tags |
26 | | - decompressed_data = re.sub(r'</>', replace_closing_tag, compressed_data) |
27 | 46 |
|
28 | | - with open(output_path, 'w', encoding='utf-8') as file: |
29 | | - file.write(decompressed_data) |
30 | 47 |
|
31 | 48 | # Usage |
32 | | -#decompressor = XMLDecompressor('../../samples/output.compressed') |
33 | | -#decompressor.decompress('../../samples/decompressed_output.xml') |
| 49 | +decompressor = XMLDecompressor(r'../samples/compressed.xml') |
| 50 | +decompressor.decompress(r'../samples/decompressed.xml') |
0 commit comments