Skip to content

Commit

Permalink
General Rollback to pagination funxtion update
Browse files Browse the repository at this point in the history
  • Loading branch information
algapster committed Jun 13, 2024
1 parent 0c0b532 commit 2c70a3c
Show file tree
Hide file tree
Showing 4 changed files with 217 additions and 26 deletions.
5 changes: 2 additions & 3 deletions rss_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
"channels": {
"cve": os.getenv("SLACK_CHANNEL_CVE"),
"news": os.getenv("SLACK_CHANNEL_NEWS"),
"error": os.getenv("SLACK_CHANNEL_ERRORS")
"pages_to_read": os.getenv("SLACK_PAGES_TO_READ", 5)

"pages_to_read": int(os.getenv("SLACK_PAGES_TO_READ")),
"error": os.getenv("SLACK_CHANNEL_ERRORS")
}
}

Expand Down
2 changes: 1 addition & 1 deletion rss_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
}

keywords = {
"last_modified": "2024-04-11",
"last_modified": "2024-06-13",
"ignored": [
"hiring"
],
Expand Down
212 changes: 212 additions & 0 deletions slacker/import logging1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
import logging
import re
from bs4 import BeautifulSoup
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
# Requires adding slack_sdk to the requirements.txt file
# Requires adding bs4 to the requirements.txt file
# Requires adding lxml to the requirements.txt file
# Set module logger name
logger = logging.getLogger(__name__)
def init_slack_client(slack_token):
"""
Instantiates a Slack web client that can call API methods
:param slack_token: Slack API token
:return: Slack Client Object
"""
import logging
import re
from bs4 import BeautifulSoup
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
# Requires adding slack_sdk to the requirements.txt file
# Requires adding bs4 to the requirements.txt file
# Requires adding lxml to the requirements.txt file
# Set module logger name
logger = logging.getLogger(__name__)
def init_slack_client(slack_token):
"""
Instantiates a Slack web client that can call API methods
:param slack_token: Slack API token
:return: Slack Client Object
"""
return WebClient(token=slack_token)


def read_channel(client, channel_id, rss_type):
def read_channel(client, channel_id, rss_type, pages_to_read):
"""
Reads channel conversations and returns matching content
This requires the following scopes:
channels:history
View messages and other content in public channels that syphon has been added to
groups:history
View messages and other content in private channels that syphon has been added to
im:history
View messages and other content in direct messages that syphon has been added to
incoming-webhook
Post messages to specific channels in Slack
mpim:history
View messages and other content in group direct messages that syphon has been added to
:param client: Slack Client Object
:param channel_id: Slack Channel ID
:param rss_type: CVE or NEWs job type
:return: Dictionary of content
"""
# Set default return dict
re_dict = {
"links": [],
"md5s": [],
"fixed_cves": [],
"seen_cves": []
This requires the following scopes:
channels:history
View messages and other content in public channels that syphon has been added to
groups:history
View messages and other content in private channels that syphon has been added to
im:history
View messages and other content in direct messages that syphon has been added to
incoming-webhook
Post messages to specific channels in Slack
mpim:history
View messages and other content in group direct messages that syphon has been added to
:param client: Slack Client Object
:param channel_id: Slack Channel ID
:param rss_type: CVE or NEWs job type
:return: Dictionary of content
"""
# Set default return dict
re_dict = {
"links": [],
"md5s": [],
"fixed_cves": [],
"seen_cves": []
}
try:
# Call the conversations.history method using the WebClient
# The conversations.history returns 99 messages by default
# Results are paginated, see: https://api.slack.com/method/conversations.history$pagination
# TODO handle paginating multiple pages
conversation_history = []
result = client.conversations_history(channel=channel_id)
conversation_history = result["messages"]
conversation_history.extend(result["messages"])
while result["response_metadata"]["next_cursor"] is not None and pages_to_read > 0:
result = client.conversations_history(channel=channel_id, cursor=result["response_metadata"]["next_cursor"])
conversation_history.extend(result["messages"])
pages_to_read = pages_to_read - 1
# Initialize dict and lists for storing links/md5s
re_link = []
link_regex = r"(?:link\:.+?)(https?:\/\/(?:www\.)?[-a-zA-Z-1-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*))"
re_results = re.findall(link_regex, str(conversation_history), re.IGNORECASE)
for re_result in re_results:
if re_result not in re_link:
re_link.append(re_result)
re_md5 = []
md5_regex = r"(?:md5:\s)([a-f0-9]{32})"
re_results = re.findall(md5_regex, str(conversation_history), re.IGNORECASE)
for re_result in re_results:
if re_result not in re_md5:
re_md5.append(re_result)
already_fixed_list = []
already_seen_list = []
# Save timestamp if cve
if rss_type == "cve":
cve_regex = r"(CVE-20[0-9]{2}-\d+)"
for dialog in conversation_history:
if "reactions" in dialog:
if list(filter(lambda item: item['name'] == 'white_check_mark', dialog["reactions"])):
cve_dialog_results = re.findall(cve_regex, str(dialog), re.IGNORECASE)
for dialog_result in cve_dialog_results:
if dialog_result not in already_fixed_list:
already_fixed_list.append(dialog_result)
cve_convo_results = re.findall(cve_regex, str(conversation_history), re.IGNORECASE)
for convo_result in cve_convo_results:
if convo_result not in already_seen_list:
already_seen_list.append(convo_result)
re_dict = {
"links": re_link,
"md5s": re_md5,
"fixed_cves": already_fixed_list,
"seen_cves": already_seen_list
}
except SlackApiError as e:
msg = f"Error creating conversation: {e}"
logger.error(msg)
return re_dict
def post_message(client, channel_id, messages):
"""
This requires the following scopes:
chat:write:bot
Send messages as @syphon
:param client: Slack Client Object
:param channel_id: Slack Channel ID
:param messages: Message body content
"""
# messages = message_body.split('\n\n\n\n')
for message in messages.split('\n---EOM---'):
if message:
try:
# Call the chat.postMessage method using the WebClient
result = client.chat_postMessage(
channel=channel_id,
text=message,
unfurl_links=False,
unfurl_media=False,
parse="mrkdwn"
)
logger.info(result)
except SlackApiError as e:
msg = f"Error posting message: {e}"
logger.error(msg)
def clean_html(input_text):
"""
Summaries often come as html formatted.
This def uses bs4 to clean that up.
:param input_text: Text to clean
:return: Cleaned output
"""
text = BeautifulSoup(input_text, "lxml").get_text(separator="\n")
return re.sub('\n\n', '\n', text)
def build_results_message(feed_results, rss_found_already, rss_type):
"""
Build message which will be used as the content body
:param feed_results: Full list of processed rss posts
:param rss_found_already: Filter for RSS articles found in Slack channel
:param rss_type: Limited to News or CVE type articles
:return: Message body content
"""
res = ""
if feed_results["articles"]:
for rss_post in feed_results["articles"]:
if rss_post['md5'] in rss_found_already['md5s']:
continue
elif rss_post['link'] in rss_found_already['links']:
continue
elif rss_post['md5'] not in res:
post_title = rss_post["title"].lower()
post_summary = rss_post["summary"].lower()
# Publishing News
if rss_type == "news":
if not any(x in post_title for x in ["cve", "vulnerability"]):
res += f"\n{rss_post['title']}\n"
res += f" • link: {rss_post['link']}\n"
res += f" • md5: {rss_post['md5']}\n"
res += f" • keyword(s): {rss_post['keywords']}\n"
res += f" • feed: {rss_post['rss_feed_name']}\n"
res += f"---EOM---"
# Publishing CVEs
elif rss_type == "cve":
if ("cve" in post_title) or ("cve" in post_summary):
# Parse for CVEs
cve_list = []
cve_url_list = []
cve_regex = r"(CVE-20[0-9]{2}-\d+)"
cve_title_results = re.findall(cve_regex, str(rss_post['title']), re.IGNORECASE)
cve_summary_results = re.findall(cve_regex, str(rss_post['summary']), re.IGNORECASE)
# Check CVE lists and dedup results and readies for results
for title_result in cve_title_results:
if title_result not in cve_list:
24 changes: 2 additions & 22 deletions slacker/slacker.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ def init_slack_client(slack_token):

def read_channel(client, channel_id, rss_type, pages_to_read):



"""
Reads channel conversations and returns matching content
Expand Down Expand Up @@ -55,20 +53,15 @@ def read_channel(client, channel_id, rss_type, pages_to_read):
}

try:
# Convert pages_to_read to an integer if it's not already

conversation_history = []
result = client.conversations_history(channel=channel_id)
conversation_history.extend(result["messages"])

while result["response_metadata"]["next_cursor"] is not None and pages_to_read > 0:
result = client.conversations_history(channel=channel_id, cursor=result["response_metadata"]["next_cursor"])
conversation_history.extend(result["messages"])
pages_to_read = pages_to_read - 1

# Safely update next_cursor
next_cursor = result.get("response_metadata", {}).get("next_cursor")

# Process extracted messages to find links and MD5 hashes
re_link = []
link_regex = re.compile(r"(?:link\:.+?)(https?:\/\/(?:www\.)?[-a-zA-Z-1-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*))", re.IGNORECASE)
Expand Down Expand Up @@ -114,11 +107,6 @@ def read_channel(client, channel_id, rss_type, pages_to_read):
except SlackApiError as e:
msg = f"Error fetching conversation data: {e.response.get('error', 'Unknown error')}"
logger.error(msg)
return re_dict
except KeyError as e:
# This catches missing keys in the result dictionary
logger.error(f"Key error: {e} - likely missing in the API response.")
return re_dict

return re_dict

Expand Down Expand Up @@ -255,7 +243,6 @@ def send_message(job_type, message_params, matched, errors, check_stale_keywords
:param errors: List of feeds that have an error
:param check_stale_keywords: None or date
"""

# Check if module is enabled and bail out if not
if str(message_params["slack_enabled"]).lower() == "false":
logger.debug("Debug: Slack not enabled.")
Expand All @@ -265,20 +252,13 @@ def send_message(job_type, message_params, matched, errors, check_stale_keywords
slack_channel = message_params["channels"]
pages_to_read = message_params["pages_to_read"]

try:
pages_to_read = int(message_params["pages_to_read"]) # Convert pages_to_read to integer
except ValueError:
logger.error(f"pages_to_read should be an integer, got: {message_params['pages_to_read']}")
return # Exit the function if conversion fails


# Check if slack_token is set
if slack_token:
# Init Slack Client
slack_client = init_slack_client(slack_token)

# Pull RSS that was found already in channel
rss_found = read_channel(slack_client, slack_channel[job_type], job_type, pages_to_read)
rss_found = read_channel(slack_client, slack_channel[job_type], job_type, pages_to_read)

# Build the message that will be sent
message_body = build_results_message(matched, rss_found, job_type)
Expand Down

0 comments on commit 2c70a3c

Please sign in to comment.