Skip to content

Commit 6ddf038

Browse files
committed
Added support for RST and TXT files
Basically, for RST and TXT files we skip the comment parsing section, and simply pass each line of the files to the spell checker. Also, more comments were added to the code.
1 parent bdb9b43 commit 6ddf038

1 file changed

Lines changed: 63 additions & 9 deletions

File tree

codespell.py

Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from comment_parser import comment_parser
1515

1616

17+
# Split a camel case string into individual words.
1718
def splitCamelCase(word):
1819

1920
result = []
@@ -33,19 +34,45 @@ def splitCamelCase(word):
3334
return result
3435

3536

37+
# Map file suffix to file type.
3638
def getMimeType(filepath):
3739

3840
suffix2mime = { '.h': 'text/x-c++',
3941
'.cxx': 'text/x-c++',
4042
'.c': 'text/x-c++',
4143
'.py': 'text/x-python',
42-
'.ruby': 'test/x-ruby',
43-
'.java': 'text/x-java-source'
44+
'.ruby': 'text/x-ruby',
45+
'.java': 'text/x-java-source',
46+
'.txt': 'text/plain',
47+
'.rst': 'text/plain',
4448
}
4549
name, ext = os.path.splitext(filepath)
46-
return suffix2mime[ext]
50+
51+
if ext in suffix2mime:
52+
return suffix2mime[ext]
53+
else:
54+
return 'text/plain'
55+
56+
57+
#
58+
# For a regular text file, we don't need to parse it for comments. We
59+
# just pass every line to the spell checked
60+
#
61+
def load_text_file(filename):
62+
63+
output = []
64+
lc = 0
65+
with open(filename) as fp:
66+
for line in fp:
67+
line = line.strip()
68+
lc = lc + 1
69+
comment = comment_parser.common.Comment(line, lc)
70+
output.append(comment)
71+
return output
4772

4873

74+
# The main spell checking procedure
75+
#
4976
def spell_check_file(filename, spell_checker, mimetype='',
5077
output_lvl=1, prefixes=[]):
5178

@@ -56,15 +83,22 @@ def spell_check_file(filename, spell_checker, mimetype='',
5683
print("spell_check_file:", filename, ",", mimetype)
5784

5885
# Returns a list of comment_parser.parsers.common.Comments
59-
try:
60-
clist = comment_parser.extract_comments(filename, mime=mimetype)
61-
except BaseException:
62-
print("Parser failed, skipping file\n")
63-
return []
86+
if mimetype == 'text/plain':
87+
clist = load_text_file(filename)
88+
else:
89+
try:
90+
clist = comment_parser.extract_comments(filename, mime=mimetype)
91+
except BaseException:
92+
print("Parser failed, skipping file\n")
93+
return []
6494

6595
bad_words = []
6696

6797
for c in clist:
98+
if output_lvl > 1:
99+
print("Comment: ", c)
100+
print(type(c))
101+
68102
mistakes = []
69103
spell_checker.set_text(c.text())
70104

@@ -139,6 +173,8 @@ def spell_check_file(filename, spell_checker, mimetype='',
139173
return bad_words
140174

141175

176+
# Does the file match any pattern in the exclude_list? Then exclude it.
177+
#
142178
def exclude_check(name, exclude_list):
143179
if exclude_list is None:
144180
return False
@@ -182,6 +218,8 @@ def parse_args():
182218
return args
183219

184220

221+
# Add the words from a dictionary file into our spell checking dictionary.
222+
#
185223
def add_dict(enchant_dict, filename):
186224
with open(filename) as f:
187225
lines = f.read().splitlines()
@@ -197,6 +235,8 @@ def main():
197235

198236
sitk_dict = Dict('en_US')
199237

238+
# Load the dictionary files
239+
#
200240
initial_dct = Path(__file__).parent / 'additional_dictionary.txt'
201241
if not initial_dct.exists():
202242
initial_dct = None
@@ -210,6 +250,7 @@ def main():
210250
spell_checker = SpellChecker(sitk_dict,
211251
filters=[EmailFilter, URLFilter])
212252

253+
# Set the amount of debugging messages to print.
213254
output_lvl = 1
214255
if args.brief:
215256
output_lvl = 0
@@ -233,11 +274,15 @@ def main():
233274
print("Prefixes:", prefixes)
234275
print("Suffixes:", args.suffix)
235276

277+
#
278+
# Spell check the files
279+
#
236280
for f in file_list:
237281

238282
if not args.miss:
239283
print("\nChecking", f)
240284

285+
# If f is a directory, recursively check for files in it.
241286
if os.path.isdir(f):
242287

243288
# f is a directory, so search for files inside
@@ -261,8 +306,10 @@ def main():
261306
output_lvl=output_lvl,
262307
prefixes=prefixes)
263308
bad_words = sorted(bad_words + result)
309+
264310
else:
265311

312+
# f is a file
266313
if exclude_check(f, args.exclude):
267314
print("\nExcluding", x)
268315
continue
@@ -273,21 +320,28 @@ def main():
273320

274321
bad_words = sorted(bad_words + result)
275322

323+
324+
# Done spell checking. Print out all the words not found in our dictionary.
325+
#
276326
if not args.miss:
277327
print("\nBad words\n")
278328

279329
prev = ""
330+
bc = 0
280331
for x in bad_words:
281332
if x[0] == prev:
282333
sys.stdout.write('.')
283334
continue
284335
print("\n", x[0], ": ", x[1], ", ", x[2], sep='')
285336
prev = x[0]
337+
bc = bc + 1
286338

287339
if not args.miss:
288340
print("")
289341

290-
print(len(bad_words), "misspellings found")
342+
print("")
343+
print(bc, "unknown words found")
344+
print(len(bad_words), "instances")
291345

292346
sys.exit(len(bad_words))
293347

0 commit comments

Comments
 (0)