From d500ff7aa3d52710cbdee199ddda4c39f9975b5b Mon Sep 17 00:00:00 2001 From: Saima Date: Mon, 11 Oct 2021 10:24:20 +0530 Subject: [PATCH 01/17] Add new file: Poissondistribution.py; Implement mean, stdev, pdf, cdf, plot and __repr__ --- probdists/Poissondistribution.py | 81 ++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 probdists/Poissondistribution.py diff --git a/probdists/Poissondistribution.py b/probdists/Poissondistribution.py new file mode 100644 index 0000000..8fb045a --- /dev/null +++ b/probdists/Poissondistribution.py @@ -0,0 +1,81 @@ +import math +import matplotlib.pyplot as plt +from .Generaldistribution import Distribution + +class Poisson(Distribution): + """ + """ + + def __init__(self, lmbda): + + self.lmbda = lmbda + + Distribution.__init__(self, self.calculate_mean() + self.calculate_stdev()) + + def calculate_mean(self, round_to=2): + """ + """ + self.mean = math.sqrt(self.lmbda) + + return round(self.mean, round_to) + + def calculate_stdev(self, round_to=2): + """ + """ + self.stdev = math.sqrt(self.lmbda) + + return round(self.stdev, round_to) + + def calculate_pdf(self, x, round_to=2): + """ + """ + + fact = math.factorial(x) + self.pdf = ( math.exp(-self.lmbda) * self.lmbda ** x) / fact + return round(self.pdf, round_to) + + def calculate_cdf(self, x, round_to=2): + """ + """ + value = 0 + for i in range(x): + value += _calc_discrete_pdf(i) + return round(value, round_to) + + def _calc_discrete_pdf(x): + """ + """ + fact = math.factorial(x) + pdf = ( math.exp(-self.lmbda) * self.lmbda ** x) / fact + return round(pdf, round_to) + + def plot_bar_pdf(self, points=100): + """ + """ + + x = [] + y = [] + + # calculate the x values to visualize + for i in range(points + 1): + x.append(i) + y.append(self._calc_discrete_pdf(i)) + + # make the plots + plt.bar(x, y) + plt.title("Probability Mass Plt for Poisson Distribution") + plt.ylabel("Probability") + plt.xlabel("x") + + plt.show() + + return x, y + + + def __repr__(self): + """ + """ + + return "mean {0}, standard deviation {1}, lambda {2}".format(self.mean, self.stdev, self.lmbda) + From ff16c14755e18faba8eebcc24cc6af8ad34b0a0d Mon Sep 17 00:00:00 2001 From: Saima Date: Mon, 11 Oct 2021 10:41:03 +0530 Subject: [PATCH 02/17] Add pydocs: Poisson Distribution --- probdists/Exponentialdistribution.py | 2 +- probdists/Poissondistribution.py | 75 +++++++++++++++++++++++----- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/probdists/Exponentialdistribution.py b/probdists/Exponentialdistribution.py index 8eda38f..a9cf3f8 100644 --- a/probdists/Exponentialdistribution.py +++ b/probdists/Exponentialdistribution.py @@ -124,7 +124,7 @@ def plot_bar_pdf(self, points=100): # def __repr__(self): - """ Method to outputthe characteristics of the Exponential instace. + """ Method to output the characteristics of the Exponential instace. Args: None Returns: diff --git a/probdists/Poissondistribution.py b/probdists/Poissondistribution.py index 8fb045a..758b4f1 100644 --- a/probdists/Poissondistribution.py +++ b/probdists/Poissondistribution.py @@ -3,7 +3,18 @@ from .Generaldistribution import Distribution class Poisson(Distribution): - """ + """ Poisson distribution class for calculating and + visualizing a Poisson distribution. + + Attributes: + + mean (float): the mean value of the distribution + stdev (float): the standard deviation of the distribution + + data (list of floats): extracted from the data file + + lmbda (float): rate of the poisson distribution (missing an 'a' to prevent name clash with Python keyword) + """ def __init__(self, lmbda): @@ -14,29 +25,53 @@ def __init__(self, lmbda): self.calculate_stdev()) def calculate_mean(self, round_to=2): - """ + """ Method to calculate the mean from lambda + + Args: + round_to (int): Round the mean value. [Default value: 2 floating point] + + Returns: + float: mean of the distribution """ self.mean = math.sqrt(self.lmbda) return round(self.mean, round_to) def calculate_stdev(self, round_to=2): - """ + """ Method to calculate the standard deviation from lmbda + + Args: + round_to (int): Round the mean value. [Default value: 2 floating point] + + Returns: + float: standard deviation of the distribution """ self.stdev = math.sqrt(self.lmbda) return round(self.stdev, round_to) def calculate_pdf(self, x, round_to=2): - """ + """ Probability density function calculator for the Poisson distribution. + + Args: + x (float): point for caluclating the probability density function + round_to (int): Round the mean value. [Default value: 2 floating point] + + Returns: + float: probability density function """ - fact = math.factorial(x) - self.pdf = ( math.exp(-self.lmbda) * self.lmbda ** x) / fact + self.pdf = _calc_discrete_pdf(x) return round(self.pdf, round_to) def calculate_cdf(self, x, round_to=2): - """ + """ Probability density function calculator for the Poisson distribution. + Args: + x (float): point for calculating the probability density function + round_to (int): Round the mean value. [Default value: 2 floating point] + + Returns: + float: probability density function output """ value = 0 for i in range(x): @@ -44,14 +79,26 @@ def calculate_cdf(self, x, round_to=2): return round(value, round_to) def _calc_discrete_pdf(x): - """ + """ Internal function to calculate probability density function at a point. + Should not be used by end user. + + Args: + x (int): point for calculating the mean value. """ fact = math.factorial(x) pdf = ( math.exp(-self.lmbda) * self.lmbda ** x) / fact - return round(pdf, round_to) + return pdf def plot_bar_pdf(self, points=100): - """ + """ Method to plot the pdf of the Poisson distribution. + + Args: + points (int): number of discrete data points + + Returns: + list: x values for the pdf plot + list: y values for the pdf plot + """ x = [] @@ -69,12 +116,16 @@ def plot_bar_pdf(self, points=100): plt.xlabel("x") plt.show() - + return x, y def __repr__(self): - """ + """ Method to output the characteristics of the Poisson instace. + Args: + None + Returns: + string: characteristics of the Poisson """ return "mean {0}, standard deviation {1}, lambda {2}".format(self.mean, self.stdev, self.lmbda) From fdcba26d79aa1cf7367a7a64cd1bbb0fc13f6c83 Mon Sep 17 00:00:00 2001 From: Saima Date: Mon, 11 Oct 2021 11:02:04 +0530 Subject: [PATCH 03/17] Add tests for Poisson distribution class --- probdists/numbers_poisson.txt | 100 ++++++++++++++++++++++++++++++++++ test.py | 50 +++++++++++++++++ 2 files changed, 150 insertions(+) create mode 100644 probdists/numbers_poisson.txt diff --git a/probdists/numbers_poisson.txt b/probdists/numbers_poisson.txt new file mode 100644 index 0000000..6a0bcb8 --- /dev/null +++ b/probdists/numbers_poisson.txt @@ -0,0 +1,100 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 \ No newline at end of file diff --git a/test.py b/test.py index 240b84d..a471bcf 100644 --- a/test.py +++ b/test.py @@ -8,6 +8,7 @@ from probdists import Bernoulli from probdists import Uniform from probdists import Triangular, TriangularValueException +from probdists import Poisson class TestGeneraldistribution(unittest.TestCase): @@ -186,6 +187,55 @@ def test_cdf(self): self.assertEqual(self.exponential.calculate_cdf(9.5, 4), 0.907, \ 'calculate_cdf does not return expected result after calculating mean and stdev') +class TestPoissonClass(unittest.TestCase): + def setUp(self): + self.poisson = Poisson(50) + self.poisson.read_data_file('probdists/numbers_poisson.txt') + + def test_initialization(self): + self.assertEqual(self.poisson.mean, 7.07, 'incorrect mean') + self.assertEqual(self.poisson.stdev, 7.07, + 'incoorect standard deviation') + + def test_readdata(self): + self.assertEqual(self.exponential.data, + [i for i in range(1,101)], + 'data read incorrectly') + + def test_meancalculation(self): + self.poisson.calculate_mean() + self.assertEqual(self.poisson.mean, + 7.07, + 'calculated mean not as expected') + + def test_stdevcalculation(self): + self.poisson.calculate_stdev() + self.assertEqual(self.poisson.stdev, + 7.07, + 'calculated standard deviation incorrect') + + def test_pdf(self): + self.assertEqual(self.poisson.calculate_pdf(50, 5), 0.05632, + 'calculate_pdf function does not give expected result') + self.poisson.calculate_mean() + self.poisson.calculate_stdev() + self.assertEqual(self.poisson.calculate_pdf(75, 5), 0.00021, + 'calculate_pdf function after calculating mean and \ + stdev does not give expected result') + + def test_cdf(self): + self.assertEqual(self.poisson.calculate_cdf(25), 0.0, 'calculate_cdf does not return expected result') + self.assertEqual(self.poisson.calculate_cdf(50, 5), 0.48119, 'calculate_cdf does not return expected result') + + self.poisson.calculate_mean() + self.poisson.calculate_stdev() + + self.assertEqual(self.poisson.calculate_cdf(60), 0.91, \ + 'calculate_cdf does not return expected result after calculating mean and stdev') + self.assertEqual(self.poisson.calculate_cdf(75, 4), 0.9994, \ + 'calculate_cdf does not return expected result after calculating mean and stdev') + + class TestUniformClass(unittest.TestCase): def setUp(self): self.uniform = Uniform(0,10) From ea9664d5c682aa45f999da88287b05f104b4a5fa Mon Sep 17 00:00:00 2001 From: Saima Date: Mon, 11 Oct 2021 11:05:08 +0530 Subject: [PATCH 04/17] Update data map for Poisson --- probdists/Generaldistribution.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/probdists/Generaldistribution.py b/probdists/Generaldistribution.py index 4beac8b..accce45 100644 --- a/probdists/Generaldistribution.py +++ b/probdists/Generaldistribution.py @@ -58,7 +58,8 @@ def read_data_file(self, file_name, separator='\\n', header=None): 'demo_gamma_data': 'numbers_gamma.txt', 'demo_uniform_data': 'numbers_uniform.txt', 'demo_bernoulli_data': 'numbers_bernoulli.txt', - 'demo_triangular_data': 'numbers_triangular.txt' + 'demo_triangular_data': 'numbers_triangular.txt', + 'demo_poisson_data': 'numbers_poisson.txt' } if file_name in file_name_map: dirname = Path(__file__).parent.parent.absolute() From 25f2fadb9f89d9399e258d5b29dbc0665de4d358 Mon Sep 17 00:00:00 2001 From: Saima Date: Mon, 11 Oct 2021 11:17:23 +0530 Subject: [PATCH 05/17] Missed a comma --- probdists/Poissondistribution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/probdists/Poissondistribution.py b/probdists/Poissondistribution.py index 758b4f1..dec336d 100644 --- a/probdists/Poissondistribution.py +++ b/probdists/Poissondistribution.py @@ -21,7 +21,7 @@ def __init__(self, lmbda): self.lmbda = lmbda - Distribution.__init__(self, self.calculate_mean() + Distribution.__init__(self, self.calculate_mean(), self.calculate_stdev()) def calculate_mean(self, round_to=2): From 15f7561b75d157f04a8ac0d2b942747f5a03da14 Mon Sep 17 00:00:00 2001 From: Saima Date: Mon, 11 Oct 2021 11:19:14 +0530 Subject: [PATCH 06/17] Remove trailing newlines --- probdists/Poissondistribution.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/probdists/Poissondistribution.py b/probdists/Poissondistribution.py index dec336d..4338567 100644 --- a/probdists/Poissondistribution.py +++ b/probdists/Poissondistribution.py @@ -14,9 +14,8 @@ class Poisson(Distribution): data (list of floats): extracted from the data file lmbda (float): rate of the poisson distribution (missing an 'a' to prevent name clash with Python keyword) - + """ - def __init__(self, lmbda): self.lmbda = lmbda @@ -128,5 +127,4 @@ def __repr__(self): string: characteristics of the Poisson """ - return "mean {0}, standard deviation {1}, lambda {2}".format(self.mean, self.stdev, self.lmbda) - + return "mean {0}, standard deviation {1}, lambda {2}".format(self.mean, self.stdev, self.lmbda) \ No newline at end of file From d7c9918cf5114056b365d17611d559dceedb1f7d Mon Sep 17 00:00:00 2001 From: Saima Date: Wed, 13 Oct 2021 08:48:40 +0530 Subject: [PATCH 07/17] Lint: Poissondistribution.py and test.py with flake8 --- .vscode/settings.json | 4 +++ probdists/Poissondistribution.py | 50 ++++++++++++++++++-------------- test.py | 36 ++++++++++++----------- 3 files changed, 51 insertions(+), 39 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..33fe63f --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "python.linting.flake8Enabled": true, + "python.linting.enabled": true +} \ No newline at end of file diff --git a/probdists/Poissondistribution.py b/probdists/Poissondistribution.py index 4338567..0c76958 100644 --- a/probdists/Poissondistribution.py +++ b/probdists/Poissondistribution.py @@ -2,8 +2,9 @@ import matplotlib.pyplot as plt from .Generaldistribution import Distribution + class Poisson(Distribution): - """ Poisson distribution class for calculating and + """ Poisson distribution class for calculating and visualizing a Poisson distribution. Attributes: @@ -13,21 +14,23 @@ class Poisson(Distribution): data (list of floats): extracted from the data file - lmbda (float): rate of the poisson distribution (missing an 'a' to prevent name clash with Python keyword) - + lmbda (float): rate of the poisson distribution + (missing an 'a' to prevent name clash with Python keyword) """ def __init__(self, lmbda): self.lmbda = lmbda - Distribution.__init__(self, self.calculate_mean(), - self.calculate_stdev()) + Distribution.__init__(self, + self.calculate_mean(), + self.calculate_stdev()) def calculate_mean(self, round_to=2): """ Method to calculate the mean from lambda Args: - round_to (int): Round the mean value. [Default value: 2 floating point] + round_to (int): Round the mean value. + [Default value: 2 floating point] Returns: float: mean of the distribution @@ -35,12 +38,13 @@ def calculate_mean(self, round_to=2): self.mean = math.sqrt(self.lmbda) return round(self.mean, round_to) - + def calculate_stdev(self, round_to=2): """ Method to calculate the standard deviation from lmbda Args: - round_to (int): Round the mean value. [Default value: 2 floating point] + round_to (int): Round the mean value. + [Default value: 2 floating point] Returns: float: standard deviation of the distribution @@ -48,46 +52,48 @@ def calculate_stdev(self, round_to=2): self.stdev = math.sqrt(self.lmbda) return round(self.stdev, round_to) - + def calculate_pdf(self, x, round_to=2): """ Probability density function calculator for the Poisson distribution. Args: x (float): point for caluclating the probability density function - round_to (int): Round the mean value. [Default value: 2 floating point] + round_to (int): Round the mean value. + [Default value: 2 floating point] Returns: float: probability density function """ - self.pdf = _calc_discrete_pdf(x) + self.pdf = self._calc_discrete_pdf(x) return round(self.pdf, round_to) - + def calculate_cdf(self, x, round_to=2): """ Probability density function calculator for the Poisson distribution. Args: x (float): point for calculating the probability density function - round_to (int): Round the mean value. [Default value: 2 floating point] + round_to (int): Round the mean value. + [Default value: 2 floating point] Returns: float: probability density function output """ value = 0 for i in range(x): - value += _calc_discrete_pdf(i) + value += self._calc_discrete_pdf(i) return round(value, round_to) - - def _calc_discrete_pdf(x): + + def _calc_discrete_pdf(self, x): """ Internal function to calculate probability density function at a point. Should not be used by end user. - + Args: x (int): point for calculating the mean value. """ fact = math.factorial(x) - pdf = ( math.exp(-self.lmbda) * self.lmbda ** x) / fact + pdf = (math.exp(-self.lmbda) * self.lmbda ** x) / fact return pdf - + def plot_bar_pdf(self, points=100): """ Method to plot the pdf of the Poisson distribution. @@ -107,7 +113,7 @@ def plot_bar_pdf(self, points=100): for i in range(points + 1): x.append(i) y.append(self._calc_discrete_pdf(i)) - + # make the plots plt.bar(x, y) plt.title("Probability Mass Plt for Poisson Distribution") @@ -118,7 +124,6 @@ def plot_bar_pdf(self, points=100): return x, y - def __repr__(self): """ Method to output the characteristics of the Poisson instace. Args: @@ -127,4 +132,5 @@ def __repr__(self): string: characteristics of the Poisson """ - return "mean {0}, standard deviation {1}, lambda {2}".format(self.mean, self.stdev, self.lmbda) \ No newline at end of file + return "mean {0}, standard deviation {1}, lambda {2}".format(self.mean, + self.stdev, self.lmbda) diff --git a/test.py b/test.py index a471bcf..f935169 100644 --- a/test.py +++ b/test.py @@ -50,8 +50,7 @@ def test_readdata(self): def test_meancalculation(self): self.gaussian.calculate_mean() self.assertEqual(self.gaussian.mean, - sum(self.gaussian.data) / - float(len(self.gaussian.data)), + sum(self.gaussian.data) / float(len(self.gaussian.data)), 'calculated mean not as expected') def test_stdevcalculation(self): @@ -182,10 +181,13 @@ def test_cdf(self): self.exponential.calculate_mean() self.exponential.calculate_stdev() - self.assertEqual(self.exponential.calculate_cdf(-1.3), 0, \ - 'calculate_cdf does not return expected result after calculating mean and stdev') - self.assertEqual(self.exponential.calculate_cdf(9.5, 4), 0.907, \ - 'calculate_cdf does not return expected result after calculating mean and stdev') + self.assertEqual(self.exponential.calculate_cdf(-1.3), + 0, + 'calculate_cdf does not return expected result after calculating mean and stdev') + self.assertEqual(self.exponential.calculate_cdf(9.5, 4), + 0.907, + 'calculate_cdf does not return expected result after calculating mean and stdev') + class TestPoissonClass(unittest.TestCase): def setUp(self): @@ -199,7 +201,7 @@ def test_initialization(self): def test_readdata(self): self.assertEqual(self.exponential.data, - [i for i in range(1,101)], + [i for i in range(1, 101)], 'data read incorrectly') def test_meancalculation(self): @@ -230,15 +232,17 @@ def test_cdf(self): self.poisson.calculate_mean() self.poisson.calculate_stdev() - self.assertEqual(self.poisson.calculate_cdf(60), 0.91, \ - 'calculate_cdf does not return expected result after calculating mean and stdev') - self.assertEqual(self.poisson.calculate_cdf(75, 4), 0.9994, \ - 'calculate_cdf does not return expected result after calculating mean and stdev') + self.assertEqual(self.poisson.calculate_cdf(60), + 0.91, + 'calculate_cdf does not return expected result after calculating mean and stdev') + self.assertEqual(self.poisson.calculate_cdf(75, 4), + 0.9994, + 'calculate_cdf does not return expected result after calculating mean and stdev') class TestUniformClass(unittest.TestCase): def setUp(self): - self.uniform = Uniform(0,10) + self.uniform = Uniform(0, 10) self.uniform.read_data_file('probdists/numbers_uniform.txt') def test_initialization(self): @@ -258,11 +262,10 @@ def test_replace_stats_with_data(self): self.assertEqual(l, 1) self.assertEqual(h, 5) - def test_meancalculation(self): self.uniform.calculate_mean() self.assertEqual(self.uniform.mean, - 5, + 5, 'calculated mean not as expected') def test_stdevcalculation(self): @@ -288,7 +291,6 @@ def test_cdf(self): self.assertEqual(self.uniform.calculate_cdf(4), 0.75, 'calculate_cdf function does not give expected result') - class TestGammaClass(unittest.TestCase): def setUp(self): self.gamma = Gamma() @@ -326,9 +328,9 @@ def test_pdf(self): 'calculate_pdf function does not give expected result') def test_cdf(self): - self.assertEqual(self.gamma.calculate_cdf(4, False, 5), 1 - round(3/math.exp(2), 5), + self.assertEqual(self.gamma.calculate_cdf(4, False, 5), 1 - round(3 / math.exp(2), 5), 'cdf function does not give expected result') - self.assertEqual(self.gamma.calculate_cdf(4), round(3/math.exp(2), 2), + self.assertEqual(self.gamma.calculate_cdf(4), round(3 / math.exp(2), 2), 'cdf function does not give expected result') def test_add(self): From 868bca826c6c96af510bf6a7578b89e116b55644 Mon Sep 17 00:00:00 2001 From: Saima Date: Wed, 13 Oct 2021 08:51:23 +0530 Subject: [PATCH 08/17] Commit flake8 vscode settings: ignore E501 --- .gitignore | 1 + .vscode/settings.json | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 12d6303..1ff859b 100644 --- a/.gitignore +++ b/.gitignore @@ -143,3 +143,4 @@ cython_debug/ *~ *.swp *.swo + diff --git a/.vscode/settings.json b/.vscode/settings.json index 33fe63f..b07ea65 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,5 @@ { "python.linting.flake8Enabled": true, - "python.linting.enabled": true + "python.linting.enabled": true, + "python.linting.flake8Args": ["--ignore=E501"] } \ No newline at end of file From fa4b64104fb2c22bbd8b7846d2e90663fb36f5d4 Mon Sep 17 00:00:00 2001 From: Saima Date: Sat, 16 Oct 2021 10:00:41 +0530 Subject: [PATCH 09/17] Add Poisson to __init__.py --- .vscode/settings.json | 2 +- probdists/Poissondistribution.py | 1 + probdists/__init__.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index b07ea65..e8a555a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,5 @@ { "python.linting.flake8Enabled": true, "python.linting.enabled": true, - "python.linting.flake8Args": ["--ignore=E501"] + "python.linting.flake8Args": ["--ignore=E501, F401"] } \ No newline at end of file diff --git a/probdists/Poissondistribution.py b/probdists/Poissondistribution.py index 0c76958..65c5442 100644 --- a/probdists/Poissondistribution.py +++ b/probdists/Poissondistribution.py @@ -16,6 +16,7 @@ class Poisson(Distribution): lmbda (float): rate of the poisson distribution (missing an 'a' to prevent name clash with Python keyword) + """ def __init__(self, lmbda): diff --git a/probdists/__init__.py b/probdists/__init__.py index 664a26b..ba18515 100644 --- a/probdists/__init__.py +++ b/probdists/__init__.py @@ -6,3 +6,4 @@ from .Bernoullidistribution import Bernoulli from .Uniformdistribution import Uniform from .Triangulardistribution import Triangular, TriangularValueException +from .Poissondistribution import Poisson From 7586e5bd02a80e9aebaa1628fbf1254430412b62 Mon Sep 17 00:00:00 2001 From: Saima Date: Sat, 16 Oct 2021 11:10:38 +0530 Subject: [PATCH 10/17] Implement: Batesdistribution.py --- probdists/Batesdistribution.py | 71 ++++++++++++++++++++++++++++++++ probdists/Generaldistribution.py | 7 ++-- probdists/Poissondistribution.py | 2 +- probdists/__init__.py | 1 + probdists/numbers_bates.py | 0 5 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 probdists/Batesdistribution.py create mode 100644 probdists/numbers_bates.py diff --git a/probdists/Batesdistribution.py b/probdists/Batesdistribution.py new file mode 100644 index 0000000..d5dbd1a --- /dev/null +++ b/probdists/Batesdistribution.py @@ -0,0 +1,71 @@ +import math +import numpy as np +from matplotlib import pyplot as plt +from .Generaldistribution import Distribution + + +class Bates(Distribution): + + def __init__(self, n, a=0, b=1): + + self.n = n + self.a = a + self.b = b + Distribution.__init__(self, + self.calculate_mean(), + self.calculate_stdev()) + + def calculate_mean(self, round_to=2): + self.mean = 0.5 * (self.a + self.b) + + return round(self.mean, round_to) + + def calculate_stdev(self, round_to=2): + var = (self.b - self.a) / (12 * self.n) + + self.stdev = math.sqrt(var) + + return round(self.stdev, round_to) + + def _fx(self, x): + if x < 0 or x > 1: + value = 0 + else: + g = 0 + for i in range(0, int(self.n * x + 1)): + g += pow(-1, i) * math.comb(self.n, i) * pow(x - i / self.n, self.n - 1) + value = (self.n**self.n / math.factorial(self.n - 1)) * g + return value + + def calculate_pdf(self, x, round_to=2): + self.pdf = self._fx((x - self.a) / (self.b - self.a) + ) / (self.b - self.a) + return round(self.pdf, round_to) + + def calculate_cdf(self, x, round_to=2): + value = 0 + for i in range(0, x + 1): + value += self.calculate_pdf(i) + return round(value, round_to) + + def plot_bar_pdf(self, points=100): + + x = np.linspace(self.a, self.b, num=10**6) + y = (x - self.a) / (self.b - self.a) + + F = np.zeros_like(y) + + for i in range(0, len(y) + 1 // 2): + F[i] = self.calculate_pdf(y[i]) + F[-i - 1] = F[i] # symmetric graph + + plt.plot(x, F, label=f'n={self.n}') + plt.legend() + plt.title(f"Probability Distribution Function for Bates n={self.n}") + plt.show() + return F + + def __repr__(self): + + return "mean {0}, standard deviation {1}, n {2}".format(self.mean, + self.stdev, self.n) diff --git a/probdists/Generaldistribution.py b/probdists/Generaldistribution.py index accce45..d2311c0 100644 --- a/probdists/Generaldistribution.py +++ b/probdists/Generaldistribution.py @@ -59,7 +59,8 @@ def read_data_file(self, file_name, separator='\\n', header=None): 'demo_uniform_data': 'numbers_uniform.txt', 'demo_bernoulli_data': 'numbers_bernoulli.txt', 'demo_triangular_data': 'numbers_triangular.txt', - 'demo_poisson_data': 'numbers_poisson.txt' + 'demo_poisson_data': 'numbers_poisson.txt', + 'demo_bates_data': 'numbers_bates.txt' } if file_name in file_name_map: dirname = Path(__file__).parent.parent.absolute() @@ -79,7 +80,7 @@ def read_data_file(self, file_name, separator='\\n', header=None): for i in df.iterrows(): try: data_list.append(float(df.iat[i[0], 0])) - except: # pylint: disable=W0702 + except Exception: # pylint: disable=W0702 traceback.print_exc() print('Could not convert', df.iat[i[0], 0], ' to int.') else: @@ -103,7 +104,7 @@ def read_data_file(self, file_name, separator='\\n', header=None): for number in line: try: data_list.append(float(number)) - except: # pylint: disable=W0702 + except Exception: # pylint: disable=W0702 traceback.print_exc() print('Could not convert', number, ' to int.') line = file.readline() diff --git a/probdists/Poissondistribution.py b/probdists/Poissondistribution.py index 65c5442..cf0f048 100644 --- a/probdists/Poissondistribution.py +++ b/probdists/Poissondistribution.py @@ -80,7 +80,7 @@ def calculate_cdf(self, x, round_to=2): float: probability density function output """ value = 0 - for i in range(x): + for i in range(0, x + 1): value += self._calc_discrete_pdf(i) return round(value, round_to) diff --git a/probdists/__init__.py b/probdists/__init__.py index ba18515..d8af316 100644 --- a/probdists/__init__.py +++ b/probdists/__init__.py @@ -7,3 +7,4 @@ from .Uniformdistribution import Uniform from .Triangulardistribution import Triangular, TriangularValueException from .Poissondistribution import Poisson +from .Batesdistribution import Bates diff --git a/probdists/numbers_bates.py b/probdists/numbers_bates.py new file mode 100644 index 0000000..e69de29 From 5687b31c0acae8124e31df62d70ef46efed63ad0 Mon Sep 17 00:00:00 2001 From: Saima Date: Sat, 16 Oct 2021 11:17:06 +0530 Subject: [PATCH 11/17] Add: documentation using pydocs --- probdists/Batesdistribution.py | 76 ++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 4 deletions(-) diff --git a/probdists/Batesdistribution.py b/probdists/Batesdistribution.py index d5dbd1a..6d0f5d6 100644 --- a/probdists/Batesdistribution.py +++ b/probdists/Batesdistribution.py @@ -6,8 +6,21 @@ class Bates(Distribution): - def __init__(self, n, a=0, b=1): + def __init__(self, n=20, a=0, b=1): + """ Bates distribution class for calculating and + visualizing a Bates distribution. + Attributes: + + mean (float): the mean value of the distribution + stdev (float): the standard deviation of the distribution + + data (list of floats): extracted from the data file + + n (int): The number of samples + a (int): The lower limit of distribution [Default: 0] + b (int): The upper limit of distribution [Default: 1] + """ self.n = n self.a = a self.b = b @@ -16,11 +29,29 @@ def __init__(self, n, a=0, b=1): self.calculate_stdev()) def calculate_mean(self, round_to=2): + """ Method to calculate the mean from n + + Args: + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: mean of the distribution + """ self.mean = 0.5 * (self.a + self.b) return round(self.mean, round_to) def calculate_stdev(self, round_to=2): + """ Method to calculate the standard deviation from n + + Args: + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: standard deviation of the distribution + """ var = (self.b - self.a) / (12 * self.n) self.stdev = math.sqrt(var) @@ -28,6 +59,12 @@ def calculate_stdev(self, round_to=2): return round(self.stdev, round_to) def _fx(self, x): + """ Internal function to calculate probability density function at a point. + Should not be used by end user. + + Args: + x (int): point for calculating the mean value. + """ if x < 0 or x > 1: value = 0 else: @@ -38,19 +75,45 @@ def _fx(self, x): return value def calculate_pdf(self, x, round_to=2): + """ Probability density function calculator for the Bates distribution. + + Args: + x (float): point for caluclating the probability density function + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: probability density function + """ self.pdf = self._fx((x - self.a) / (self.b - self.a) ) / (self.b - self.a) return round(self.pdf, round_to) def calculate_cdf(self, x, round_to=2): + """ Probability density function calculator for the Bates distribution. + Args: + x (float): point for calculating the probability density function + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: probability density function output + """ value = 0 for i in range(0, x + 1): value += self.calculate_pdf(i) return round(value, round_to) - def plot_bar_pdf(self, points=100): + def plot_bar_pdf(self, samples=10**6): + """ Method to plot the pdf of the Bates distribution. + + Args: + points (int): number of discrete data points - x = np.linspace(self.a, self.b, num=10**6) + Returns: + F (np.array): list of PDFs for samples + """ + x = np.linspace(self.a, self.b, num=samples) y = (x - self.a) / (self.b - self.a) F = np.zeros_like(y) @@ -66,6 +129,11 @@ def plot_bar_pdf(self, points=100): return F def __repr__(self): - + """ Method to output the characteristics of the Bates instace. + Args: + None + Returns: + string: characteristics of the Bates + """ return "mean {0}, standard deviation {1}, n {2}".format(self.mean, self.stdev, self.n) From bc29d950af74f62c3481ee201e292ab3b73623d3 Mon Sep 17 00:00:00 2001 From: Saima Date: Sun, 17 Oct 2021 11:08:27 +0530 Subject: [PATCH 12/17] Adjust rounding errors in test.py --- probdists/Batesdistribution.py | 1 + probdists/Poissondistribution.py | 1 + probdists/numbers_bates.py | 1 + test.py | 50 ++++++++++++++++++++++++++++---- 4 files changed, 47 insertions(+), 6 deletions(-) diff --git a/probdists/Batesdistribution.py b/probdists/Batesdistribution.py index 6d0f5d6..e4ac0df 100644 --- a/probdists/Batesdistribution.py +++ b/probdists/Batesdistribution.py @@ -102,6 +102,7 @@ def calculate_cdf(self, x, round_to=2): value = 0 for i in range(0, x + 1): value += self.calculate_pdf(i) + self.cdf = value return round(value, round_to) def plot_bar_pdf(self, samples=10**6): diff --git a/probdists/Poissondistribution.py b/probdists/Poissondistribution.py index cf0f048..6c02523 100644 --- a/probdists/Poissondistribution.py +++ b/probdists/Poissondistribution.py @@ -82,6 +82,7 @@ def calculate_cdf(self, x, round_to=2): value = 0 for i in range(0, x + 1): value += self._calc_discrete_pdf(i) + self.cdf = value return round(value, round_to) def _calc_discrete_pdf(self, x): diff --git a/probdists/numbers_bates.py b/probdists/numbers_bates.py index e69de29..b1bfa86 100644 --- a/probdists/numbers_bates.py +++ b/probdists/numbers_bates.py @@ -0,0 +1 @@ +0., 0.03448276, 0.06896552, 0.10344828, 0.13793103, 0.17241379, 0.20689655, 0.24137931, 0.27586207, 0.31034483, 0.34482759, 0.37931034, 0.4137931, 0.44827586, 0.48275862, 0.51724138, 0.55172414, 0.5862069, 0.62068966, 0.65517241, 0.68965517, 0.72413793, 0.75862069, 0.79310345, 0.82758621, 0.86206897, 0.89655172, 0.93103448, 0.96551724, 1 diff --git a/test.py b/test.py index f935169..c73c8dd 100644 --- a/test.py +++ b/test.py @@ -9,6 +9,7 @@ from probdists import Uniform from probdists import Triangular, TriangularValueException from probdists import Poisson +from probdists import Bates class TestGeneraldistribution(unittest.TestCase): @@ -195,29 +196,29 @@ def setUp(self): self.poisson.read_data_file('probdists/numbers_poisson.txt') def test_initialization(self): - self.assertEqual(self.poisson.mean, 7.07, 'incorrect mean') - self.assertEqual(self.poisson.stdev, 7.07, + self.assertEqual(round(self.poisson.mean, 2), 7.07, 'incorrect mean') + self.assertEqual(round(self.poisson.stdev, 2), 7.07, 'incoorect standard deviation') def test_readdata(self): - self.assertEqual(self.exponential.data, + self.assertEqual(self.poisson.data, [i for i in range(1, 101)], 'data read incorrectly') def test_meancalculation(self): self.poisson.calculate_mean() - self.assertEqual(self.poisson.mean, + self.assertEqual(round(self.poisson.mean, 2), 7.07, 'calculated mean not as expected') def test_stdevcalculation(self): self.poisson.calculate_stdev() - self.assertEqual(self.poisson.stdev, + self.assertEqual(round(self.poisson.stdev, 2), 7.07, 'calculated standard deviation incorrect') def test_pdf(self): - self.assertEqual(self.poisson.calculate_pdf(50, 5), 0.05632, + self.assertEqual(self.poisson.calculate_pdf(50, 5), 0.05633, 'calculate_pdf function does not give expected result') self.poisson.calculate_mean() self.poisson.calculate_stdev() @@ -240,6 +241,43 @@ def test_cdf(self): 'calculate_cdf does not return expected result after calculating mean and stdev') +class TestBatesClass(unittest.TestCase): + def setUp(self): + self.bates = Bates(n=30) + self.bates.read_data_file('probdists/numbers_bates.txt') + + def test_initialization(self): + self.assertEqual(self.bates.a, 0, 'incorrect initialization of interval start') + self.assertEqual(self.bates.b, 1, 'incorrect initialization of interval end') + + def test_readdata(self): + self.assertEqual(self.bates.data, + [0., 0.03448276, 0.06896552, 0.10344828, 0.13793103, + 0.17241379, 0.20689655, 0.24137931, 0.27586207, 0.31034483, + 0.34482759, 0.37931034, 0.4137931, 0.44827586, 0.48275862, + 0.51724138, 0.55172414, 0.5862069, 0.62068966, 0.65517241, + 0.68965517, 0.72413793, 0.75862069, 0.79310345, 0.82758621, + 0.86206897, 0.89655172, 0.93103448, 0.96551724, 1.], + 'data read incorrectly') + + def test_meancalculation(self): + self.assertEqual(self.bates.mean, 0.5, 'incorrect mean') + + def test_stdev(self): + self.assertEqual(round(self.bates.stdev, 2), 0.05) + + def test_pdf(self): + self.bates.calculate_mean() + self.bates.calculate_stdev() + self.bates.calculate_pdf(0.06896552) + self.assertEqual(self.bates.pdf, 0, 'incorrect pdf') + self.bates.calculate_pdf(0.75862069) + self.assertEqual(self.bates.pdf, 0, 'incorrect pdf') + + def test_cdf(self): + self.assertEqual(self.bates.cdf, 0, 'incorrect cdf') + + class TestUniformClass(unittest.TestCase): def setUp(self): self.uniform = Uniform(0, 10) From 3f038bdcc38a7f75138d3f0af6a63d2d712f87ed Mon Sep 17 00:00:00 2001 From: Saima Date: Sun, 17 Oct 2021 11:11:05 +0530 Subject: [PATCH 13/17] Change file ext --- probdists/{numbers_bates.py => numbers_bates.txt} | 0 probdists/t.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename probdists/{numbers_bates.py => numbers_bates.txt} (100%) create mode 100644 probdists/t.py diff --git a/probdists/numbers_bates.py b/probdists/numbers_bates.txt similarity index 100% rename from probdists/numbers_bates.py rename to probdists/numbers_bates.txt diff --git a/probdists/t.py b/probdists/t.py new file mode 100644 index 0000000..e69de29 From 5cfea20d9ed6392398afef331fae0c618806b709 Mon Sep 17 00:00:00 2001 From: Saima Date: Wed, 20 Oct 2021 16:50:54 +0530 Subject: [PATCH 14/17] Fix: errors and test failures; recalculate assert values manually --- probdists/Batesdistribution.py | 2 +- probdists/numbers_bates.txt | 2 +- test.py | 11 ++++++----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/probdists/Batesdistribution.py b/probdists/Batesdistribution.py index e4ac0df..995ad3f 100644 --- a/probdists/Batesdistribution.py +++ b/probdists/Batesdistribution.py @@ -100,7 +100,7 @@ def calculate_cdf(self, x, round_to=2): float: probability density function output """ value = 0 - for i in range(0, x + 1): + for i in range(0, int(x) + 1): value += self.calculate_pdf(i) self.cdf = value return round(value, round_to) diff --git a/probdists/numbers_bates.txt b/probdists/numbers_bates.txt index b1bfa86..4a32a4c 100644 --- a/probdists/numbers_bates.txt +++ b/probdists/numbers_bates.txt @@ -1 +1 @@ -0., 0.03448276, 0.06896552, 0.10344828, 0.13793103, 0.17241379, 0.20689655, 0.24137931, 0.27586207, 0.31034483, 0.34482759, 0.37931034, 0.4137931, 0.44827586, 0.48275862, 0.51724138, 0.55172414, 0.5862069, 0.62068966, 0.65517241, 0.68965517, 0.72413793, 0.75862069, 0.79310345, 0.82758621, 0.86206897, 0.89655172, 0.93103448, 0.96551724, 1 +0.0 0.03448276 0.06896552 0.10344828 0.13793103 0.17241379 0.20689655 0.24137931 0.27586207 0.31034483 0.34482759 0.37931034 0.4137931 0.44827586 0.48275862 0.51724138 0.55172414 0.5862069 0.62068966 0.65517241 0.68965517 0.72413793 0.75862069 0.79310345 0.82758621 0.86206897 0.89655172 0.93103448 0.96551724 1.0 \ No newline at end of file diff --git a/test.py b/test.py index c73c8dd..7dc4b71 100644 --- a/test.py +++ b/test.py @@ -228,16 +228,16 @@ def test_pdf(self): def test_cdf(self): self.assertEqual(self.poisson.calculate_cdf(25), 0.0, 'calculate_cdf does not return expected result') - self.assertEqual(self.poisson.calculate_cdf(50, 5), 0.48119, 'calculate_cdf does not return expected result') + self.assertEqual(self.poisson.calculate_cdf(50, 5), 0.53752, 'calculate_cdf does not return expected result') self.poisson.calculate_mean() self.poisson.calculate_stdev() self.assertEqual(self.poisson.calculate_cdf(60), - 0.91, + 0.93, 'calculate_cdf does not return expected result after calculating mean and stdev') self.assertEqual(self.poisson.calculate_cdf(75, 4), - 0.9994, + 0.9996, 'calculate_cdf does not return expected result after calculating mean and stdev') @@ -270,11 +270,12 @@ def test_pdf(self): self.bates.calculate_mean() self.bates.calculate_stdev() self.bates.calculate_pdf(0.06896552) - self.assertEqual(self.bates.pdf, 0, 'incorrect pdf') + self.assertEqual(round(self.bates.pdf), 0, 'incorrect pdf') self.bates.calculate_pdf(0.75862069) - self.assertEqual(self.bates.pdf, 0, 'incorrect pdf') + self.assertEqual(round(self.bates.pdf), 0, 'incorrect pdf') def test_cdf(self): + self.bates.calculate_cdf(0.5) self.assertEqual(self.bates.cdf, 0, 'incorrect cdf') From 46fd0c369196bc741f08ef4b65668a469202b153 Mon Sep 17 00:00:00 2001 From: Saima Date: Thu, 21 Oct 2021 08:26:20 +0530 Subject: [PATCH 15/17] Delete: dummy file --- probdists/t.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 probdists/t.py diff --git a/probdists/t.py b/probdists/t.py deleted file mode 100644 index e69de29..0000000 From a0a043d3f67df7c022ac82c6954a05f5e80c03fc Mon Sep 17 00:00:00 2001 From: Saima Date: Fri, 22 Oct 2021 21:23:38 +0530 Subject: [PATCH 16/17] Custom combination function for backward compatibility --- probdists/Batesdistribution.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/probdists/Batesdistribution.py b/probdists/Batesdistribution.py index 995ad3f..81be50d 100644 --- a/probdists/Batesdistribution.py +++ b/probdists/Batesdistribution.py @@ -70,10 +70,17 @@ def _fx(self, x): else: g = 0 for i in range(0, int(self.n * x + 1)): - g += pow(-1, i) * math.comb(self.n, i) * pow(x - i / self.n, self.n - 1) + g += pow(-1, i) * self._comb(self.n, i) * pow(x - i / self.n, self.n - 1) value = (self.n**self.n / math.factorial(self.n - 1)) * g return value + def _comb(self, n, k): + """Protected function to calculate nCk + math.comb(n,k) was added in Python v3.8 + Hence, for backward compatibility with earlier versions + """ + return math.factorial(n) / (math.factorial(n - k) * math.factorial(k)) + def calculate_pdf(self, x, round_to=2): """ Probability density function calculator for the Bates distribution. From f9867fe5e8520a537d1eb0290dd059f1343c4c5e Mon Sep 17 00:00:00 2001 From: Saima Date: Sat, 23 Oct 2021 08:25:45 +0530 Subject: [PATCH 17/17] Change: _comb from method to function --- probdists/Batesdistribution.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/probdists/Batesdistribution.py b/probdists/Batesdistribution.py index 81be50d..8cf6444 100644 --- a/probdists/Batesdistribution.py +++ b/probdists/Batesdistribution.py @@ -70,17 +70,10 @@ def _fx(self, x): else: g = 0 for i in range(0, int(self.n * x + 1)): - g += pow(-1, i) * self._comb(self.n, i) * pow(x - i / self.n, self.n - 1) + g += pow(-1, i) * _comb(self.n, i) * pow(x - i / self.n, self.n - 1) value = (self.n**self.n / math.factorial(self.n - 1)) * g return value - def _comb(self, n, k): - """Protected function to calculate nCk - math.comb(n,k) was added in Python v3.8 - Hence, for backward compatibility with earlier versions - """ - return math.factorial(n) / (math.factorial(n - k) * math.factorial(k)) - def calculate_pdf(self, x, round_to=2): """ Probability density function calculator for the Bates distribution. @@ -145,3 +138,11 @@ def __repr__(self): """ return "mean {0}, standard deviation {1}, n {2}".format(self.mean, self.stdev, self.n) + + +def _comb(n, k): + """Protected function to calculate nCk + math.comb(n,k) was added in Python v3.8 + Hence, for backward compatibility with earlier versions + """ + return math.factorial(n) / (math.factorial(n - k) * math.factorial(k))