Skip to content

Commit f010dce

Browse files
authored
Merge pull request #16 from dave3d/HandleRSTFiles
Added support for RST and TXT files
2 parents bdb9b43 + 6ddf038 commit f010dce

1 file changed

Lines changed: 63 additions & 9 deletions

File tree

codespell.py

Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from comment_parser import comment_parser
1515

1616

17+
# Split a camel case string into individual words.
1718
def splitCamelCase(word):
1819

1920
result = []
@@ -33,19 +34,45 @@ def splitCamelCase(word):
3334
return result
3435

3536

37+
# Map file suffix to file type.
3638
def getMimeType(filepath):
3739

3840
suffix2mime = { '.h': 'text/x-c++',
3941
'.cxx': 'text/x-c++',
4042
'.c': 'text/x-c++',
4143
'.py': 'text/x-python',
42-
'.ruby': 'test/x-ruby',
43-
'.java': 'text/x-java-source'
44+
'.ruby': 'text/x-ruby',
45+
'.java': 'text/x-java-source',
46+
'.txt': 'text/plain',
47+
'.rst': 'text/plain',
4448
}
4549
name, ext = os.path.splitext(filepath)
46-
return suffix2mime[ext]
50+
51+
if ext in suffix2mime:
52+
return suffix2mime[ext]
53+
else:
54+
return 'text/plain'
55+
56+
57+
#
58+
# For a regular text file, we don't need to parse it for comments. We
59+
# just pass every line to the spell checked
60+
#
61+
def load_text_file(filename):
62+
63+
output = []
64+
lc = 0
65+
with open(filename) as fp:
66+
for line in fp:
67+
line = line.strip()
68+
lc = lc + 1
69+
comment = comment_parser.common.Comment(line, lc)
70+
output.append(comment)
71+
return output
4772

4873

74+
# The main spell checking procedure
75+
#
4976
def spell_check_file(filename, spell_checker, mimetype='',
5077
output_lvl=1, prefixes=[]):
5178

@@ -56,15 +83,22 @@ def spell_check_file(filename, spell_checker, mimetype='',
5683
print("spell_check_file:", filename, ",", mimetype)
5784

5885
# Returns a list of comment_parser.parsers.common.Comments
59-
try:
60-
clist = comment_parser.extract_comments(filename, mime=mimetype)
61-
except BaseException:
62-
print("Parser failed, skipping file\n")
63-
return []
86+
if mimetype == 'text/plain':
87+
clist = load_text_file(filename)
88+
else:
89+
try:
90+
clist = comment_parser.extract_comments(filename, mime=mimetype)
91+
except BaseException:
92+
print("Parser failed, skipping file\n")
93+
return []
6494

6595
bad_words = []
6696

6797
for c in clist:
98+
if output_lvl > 1:
99+
print("Comment: ", c)
100+
print(type(c))
101+
68102
mistakes = []
69103
spell_checker.set_text(c.text())
70104

@@ -139,6 +173,8 @@ def spell_check_file(filename, spell_checker, mimetype='',
139173
return bad_words
140174

141175

176+
# Does the file match any pattern in the exclude_list? Then exclude it.
177+
#
142178
def exclude_check(name, exclude_list):
143179
if exclude_list is None:
144180
return False
@@ -182,6 +218,8 @@ def parse_args():
182218
return args
183219

184220

221+
# Add the words from a dictionary file into our spell checking dictionary.
222+
#
185223
def add_dict(enchant_dict, filename):
186224
with open(filename) as f:
187225
lines = f.read().splitlines()
@@ -197,6 +235,8 @@ def main():
197235

198236
sitk_dict = Dict('en_US')
199237

238+
# Load the dictionary files
239+
#
200240
initial_dct = Path(__file__).parent / 'additional_dictionary.txt'
201241
if not initial_dct.exists():
202242
initial_dct = None
@@ -210,6 +250,7 @@ def main():
210250
spell_checker = SpellChecker(sitk_dict,
211251
filters=[EmailFilter, URLFilter])
212252

253+
# Set the amount of debugging messages to print.
213254
output_lvl = 1
214255
if args.brief:
215256
output_lvl = 0
@@ -233,11 +274,15 @@ def main():
233274
print("Prefixes:", prefixes)
234275
print("Suffixes:", args.suffix)
235276

277+
#
278+
# Spell check the files
279+
#
236280
for f in file_list:
237281

238282
if not args.miss:
239283
print("\nChecking", f)
240284

285+
# If f is a directory, recursively check for files in it.
241286
if os.path.isdir(f):
242287

243288
# f is a directory, so search for files inside
@@ -261,8 +306,10 @@ def main():
261306
output_lvl=output_lvl,
262307
prefixes=prefixes)
263308
bad_words = sorted(bad_words + result)
309+
264310
else:
265311

312+
# f is a file
266313
if exclude_check(f, args.exclude):
267314
print("\nExcluding", x)
268315
continue
@@ -273,21 +320,28 @@ def main():
273320

274321
bad_words = sorted(bad_words + result)
275322

323+
324+
# Done spell checking. Print out all the words not found in our dictionary.
325+
#
276326
if not args.miss:
277327
print("\nBad words\n")
278328

279329
prev = ""
330+
bc = 0
280331
for x in bad_words:
281332
if x[0] == prev:
282333
sys.stdout.write('.')
283334
continue
284335
print("\n", x[0], ": ", x[1], ", ", x[2], sep='')
285336
prev = x[0]
337+
bc = bc + 1
286338

287339
if not args.miss:
288340
print("")
289341

290-
print(len(bad_words), "misspellings found")
342+
print("")
343+
print(bc, "unknown words found")
344+
print(len(bad_words), "instances")
291345

292346
sys.exit(len(bad_words))
293347

0 commit comments

Comments
 (0)