diff --git a/WASEHTMLParser.py b/WASEHTMLParser.py
index 59226b7..c7cd481 100644
--- a/WASEHTMLParser.py
+++ b/WASEHTMLParser.py
@@ -8,6 +8,15 @@
def add_attrs(attrNames, attrList):
return [a[1] for a in filter(lambda attr: attr[0] in attrNames, attrList)]
+def has_attr(attrs, attr):
+ return attr in map(lambda kv: kv[0], attrs)
+
+def attr_val_is(attrs, attr, val):
+ try:
+ return filter(lambda kv: kv[0] == attr, attrs)[0][1] == val
+ except:
+ return False
+
class WASEHTMLParser(HTMLParser, object):
def reset(self):
self.doctype = set()
@@ -28,33 +37,33 @@ def handle_decl(self, decl):
def handle_starttag(self, tag, attrs):
if tag == "iframe":
- self.frames = self.frames.union(add_attrs(["src"], attrs))
+ self.frames.update(add_attrs(["src"], attrs))
elif tag == "base":
- self.base = self.base.union(add_attrs(["href"], attrs))
- elif tag == "link" and "rel" in attrs and attrs["rel"] == "stylesheet":
- self.stylesheets = self.stylesheets.union(add_attrs(["href"], attrs))
+ self.base.update(add_attrs(["href"], attrs))
+ elif tag == "link" and attr_val_is(attrs, "rel", "stylesheet"):
+ self.stylesheets.update(add_attrs(["href"], attrs))
elif tag == "script":
- self.scripts = self.scripts.union(add_attrs(["src"], attrs))
+ self.scripts.update(add_attrs(["src"], attrs))
elif tag == "a" or tag == "area":
- self.links = self.links.union(add_attrs(["href"], attrs))
+ self.links.update(add_attrs(["href"], attrs))
elif tag == "img" or tag == "input":
- self.images = self.images.union(add_attrs(["src"], attrs))
+ self.images.update(add_attrs(["src"], attrs))
elif tag == "svg" or tag == "image":
- self.images = self.images.union(add_attrs(["href", "xlink:href"], attrs))
+ self.images.update(add_attrs(["href", "xlink:href"], attrs))
elif tag == "audio":
- self.audio = self.audio.union(add_attrs(["src"], attrs))
+ self.audio.update(add_attrs(["src"], attrs))
elif tag == "video":
- self.video = self.video.union(add_attrs(["src"], attrs))
+ self.video.update(add_attrs(["src"], attrs))
elif tag == "object":
- self.objects = self.objects.union(add_attrs(["data"], attrs))
+ self.objects.update(add_attrs(["data"], attrs))
elif tag == "embed":
- self.objects = self.objects.union(add_attrs(["src"], attrs))
+ self.objects.update(add_attrs(["src"], attrs))
elif tag == "applet":
- self.objects = self.objects.union(add_attrs(["code"], attrs))
+ self.objects.update(add_attrs(["code"], attrs))
elif tag == "form":
- self.formactions = self.formactions.union(add_attrs(["action"], attrs))
+ self.formactions.update(add_attrs(["action"], attrs))
elif tag == "input" or tag == "button":
- self.formactions = self.formactions.union(add_attrs(["formaction"], attrs))
+ self.formactions.update(add_attrs(["formaction"], attrs))
else:
return
diff --git a/elasticsearch-py b/elasticsearch-py
index 1c7b23e..51defea 160000
--- a/elasticsearch-py
+++ b/elasticsearch-py
@@ -1 +1 @@
-Subproject commit 1c7b23e2141c02e24a670fd93b033faf62943c8f
+Subproject commit 51defea8c9e1f5d664c879071d027cd5761630ab