forked from iglpdc/capstone-buoys
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcapstone_buoys.py
113 lines (68 loc) · 2.48 KB
/
capstone_buoys.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# coding: utf-8
# # Analyzing data from the National Bouy System
# ## Background
#
# The National Data Buoys Data center publish oceanographic data from their buoys on their web. Bouys send up their measurements every hour and the data is published as a spreadsheet in the NOAA web site.
#
# We want to collect the data for any, or many, bouys, clean it up, and analyze it to make a plot.
#
# ## Challenges
#
# - Get the data from the web and read it into our Python notebook.
# - Clean up the data, which sometimes has missing measurements.
# - Choose the bouys we want to analyze using their ID numbers.
# - Run our analysis (we will just make a plot).
# - Save our code as a script or module for reusing it later.
# ## References
#
# - National Data Buoy Center: http://www.ndbc.noaa.gov/
# - Data for Station 44255 - NE Burgeo Bank: http://www.ndbc.noaa.gov/station_realtime.php?station=44255
# - How a buoy get an ID: http://www.ndbc.noaa.gov/staid.shtml
# - All the station IDs: http://www.ndbc.noaa.gov/to_station.shtml
# In[96]:
#So convert into
import StringIO #For converting into strings #Moving earlier to allow for importing all buoys.txt
import pandas as pd #cf. Excel. Go-to for data processing #as pd is just giving shortcut
import requests #Library for getting stuff from web
import pylab
import numpy as np
from numpy import nan
import matplotlib.pyplot as plt
# In[82]:
# Retrieve data
# url = 'http://www.ndbc.noaa.gov/data/realtime2/44255.txt'
url = 'http://www.ndbc.noaa.gov/view_text_file.php?filename=44011h2012.txt.gz&dir=data/historical/stdmet/'
response = requests.get(url)
# In[83]:
# type(response.text)
# In[84]:
data_str = StringIO.StringIO(response.text)
# In[85]:
data = pd.read_csv(StringIO.StringIO(response.text),
delim_whitespace=True,
skiprows=[1,2],
usecols=[0,1,2,3,6,8])
# In[126]:
# import numpy as np
# from numpy import nan #moved to top
data=data.replace('MM', nan)
data=data.replace('99', nan)
data=data.dropna(axis=0)
z = np.polyfit(data.WSPD, data.WVHT, 2)
p = np.poly1d(z)
wspd=range(20)
pylab.plot(wspd,p(wspd))
# the line equation:
# print “y=%.6fx+(%.6f)”%(z[0],z[1])
# import scipy.signal
# detrended=scipy.signal.detrend(data.WSPD, data.WVHT)
# In[110]:
get_ipython().magic(u'matplotlib inline')
plt.plot(data.WSPD, data.WVHT, '*')
# data.WSPD.plot()
# In[88]:
print data.WSPD.mean()
# In[89]:
#nbviewer.com
# In[90]:
import capstone_buoys #after saving to cd as .py