-
Notifications
You must be signed in to change notification settings - Fork 844
/
Copy pathreadability.py
56 lines (38 loc) · 1.33 KB
/
readability.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# -*- coding: utf-8 -*-
# Adadpted from here: http://acdx.net/calculating-the-flesch-kincaid-level-in-python/
# See here for details: http://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_test
from __future__ import division
import re
def mean(seq):
return sum(seq) / len(seq)
def syllables(word):
if len(word) <= 3:
return 1
word = re.sub(r"(es|ed|(?<!l)e)$", "", word)
return len(re.findall(r"[aeiouy]+", word))
def normalize(text):
terminators = ".!?:;"
term = re.escape(terminators)
text = re.sub(r"[^%s\sA-Za-z]+" % term, "", text)
text = re.sub(r"\s*([%s]+\s*)+" % term, ". ", text)
return re.sub(r"\s+", " ", text)
def text_stats(text, wc):
text = normalize(text)
stcs = [s.split(" ") for s in text.split(". ")]
stcs = [s for s in stcs if len(s) >= 2]
if wc:
words = wc
else:
words = sum(len(s) for s in stcs)
sbls = sum(syllables(w) for s in stcs for w in s)
return len(stcs), words, sbls
def flesch_index(stats):
stcs, words, sbls = stats
if stcs == 0 or words == 0:
return 0
return 206.835 - 1.015 * (words / stcs) - 84.6 * (sbls / words)
def flesch_kincaid_level(stats):
stcs, words, sbls = stats
if stcs == 0 or words == 0:
return 0
return 0.39 * (words / stcs) + 11.8 * (sbls / words) - 15.59