From 25552a5253a5b49bdb215bb19d857bb522dfcd42 Mon Sep 17 00:00:00 2001 From: Samarjit Debnath Date: Sun, 8 Sep 2024 01:18:20 +0530 Subject: [PATCH 1/2] added new exercise --- 7_pos/pos_exercise.md | 1 + 1 file changed, 1 insertion(+) diff --git a/7_pos/pos_exercise.md b/7_pos/pos_exercise.md index e70a362..c215b9a 100644 --- a/7_pos/pos_exercise.md +++ b/7_pos/pos_exercise.md @@ -4,6 +4,7 @@ Exercise for Spacy POS tutorial, 1) Extract all NOUN tokens from this story. You will have to read the file in python first to collect all the text and then extract NOUNs in a python list 2) Extract all numbers (NUM POS type) in a python list 3) Print a count of all POS tags in this story + 4) Print all the root/base (consider lemma) forms of verbs (VERB POS type) in a python list. [Solution](https://github.com/codebasics/nlp-tutorials/blob/main/7_pos/Exercise/pos_exercise_solution.ipynb) From b0e7fc0dd2dd0fc72243c9ae293acc2b8d40c3ec Mon Sep 17 00:00:00 2001 From: Samarjit Debnath Date: Sun, 8 Sep 2024 01:37:10 +0530 Subject: [PATCH 2/2] added solution for pos exercise (4) --- 7_pos/Exercise/pos_exercise_solution.ipynb | 61 ++++++++++++++-------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/7_pos/Exercise/pos_exercise_solution.ipynb b/7_pos/Exercise/pos_exercise_solution.ipynb index c89c956..bc740c3 100644 --- a/7_pos/Exercise/pos_exercise_solution.ipynb +++ b/7_pos/Exercise/pos_exercise_solution.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "scrolled": true }, @@ -27,16 +27,16 @@ { "data": { "text/plain": [ - "'Inflation rose again in April, continuing a climb that has pushed consumers to the brink and is threatening the economic expansion, the Bureau of Labor Statistics reported Wednesday.\\n\\nThe consumer price index, a broad-based measure of prices for goods and services, increased 8.3% from a year ago, higher than the Dow Jones estimate for an 8.1% gain. That represented a slight ease from March’s peak but was still close to the highest level since the summer of 1982.\\n\\nRemoving volatile food and ene'" + "'Inflation rose again in April, continuing a climb that has pushed consumers to the brink and is threatening the economic expansion, the Bureau of Labor Statistics reported Wednesday.\\n\\nThe consumer price index, a broad-based measure of prices for goods and services, increased 8.3% from a year ago, higher than the Dow Jones estimate for an 8.1% gain. That represented a slight ease from March’s peak but was still close to the highest level since the summer of 1982.\\n\\nRemoving volatile food and energ'" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "with open(\"news_story.txt\",\"r\") as f:\n", + "with open(\"../news_story.txt\",\"r\") as f:\n", " news_text = f.read()\n", " \n", "news_text[:500]" @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -69,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 4, "metadata": { "scrolled": true }, @@ -80,7 +80,7 @@ "[8.3, 8.1, 1982, 6.2, 6, 0.3, 0.2, 0.6, 0.4, 0.1]" ] }, - "execution_count": 10, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -91,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 5, "metadata": { "scrolled": true }, @@ -111,7 +111,7 @@ " prices]" ] }, - "execution_count": 17, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -129,18 +129,18 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{92: 96,\n", - " 100: 29,\n", + " 100: 27,\n", " 86: 15,\n", " 85: 39,\n", - " 96: 17,\n", - " 97: 33,\n", + " 96: 16,\n", + " 97: 32,\n", " 90: 34,\n", " 95: 4,\n", " 87: 13,\n", @@ -153,7 +153,7 @@ " 101: 1}" ] }, - "execution_count": 12, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -165,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -173,11 +173,11 @@ "output_type": "stream", "text": [ "NOUN | 96\n", - "VERB | 29\n", + "VERB | 27\n", "ADV | 15\n", "ADP | 39\n", - "PROPN | 17\n", - "PUNCT | 33\n", + "PROPN | 16\n", + "PUNCT | 32\n", "DET | 34\n", "PRON | 4\n", "AUX | 13\n", @@ -195,11 +195,30 @@ "for k,v in count.items():\n", " print(doc.vocab[k].text, \"|\",v)" ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['rise', 'continue', 'push', 'threaten', 'report', 'base', 'increase', 'represent', 'remove', 'call', 'rise', 'cloud', 'peak', 'mean', 'continue', 'lose', 'adjust', 'decrease', 'drop', 'begin', 'see', 'rise', 'spread', 'respond', 'come', 'show', 'have']\n" + ] + } + ], + "source": [ + "# Print all the root/base (consider lemma) forms of verbs (VERB POS type) in a python list.\n", + "lemmVerbTokens = [token.lemma_ for token in doc if token.pos_ == 'VERB']\n", + "print(lemmVerbTokens)" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -213,7 +232,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.11.1" } }, "nbformat": 4,