From f45693e7f06a60eaf273698550e613e33eb8b212 Mon Sep 17 00:00:00 2001 From: TomV Date: Thu, 7 Jul 2011 20:45:42 +0100 Subject: [PATCH] initial parsing with nltk --- parse.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 parse.py diff --git a/parse.py b/parse.py new file mode 100644 index 0000000..d44eb98 --- /dev/null +++ b/parse.py @@ -0,0 +1,15 @@ +import nltk + +def get_parts(text): + text = nltk.word_tokenize(text) + parts = nltk.pos_tag(text) + dic = {} + for work, part in parts: + if part in dic: + dic[part].append(work) + else: + dic[part] = [work] + return dic + +get_parts("hi how are you") +