Skip to content

Commit

Permalink
version 0.2.2
Browse files Browse the repository at this point in the history
- Added ignore_inline_image regular expression to ignore useless images
- fixed UID data type from int to string #6
  • Loading branch information
awalon committed Aug 15, 2022
1 parent b581f59 commit 042e2ef
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 37 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,18 @@ Expectation: Forwarded content will be smaller than this value.
# max length (characters) of forwarded mail content
#max_length: 2000
```

`ignore_inline_image` Ignore embedded image(s) if regular expression matches source attribute.

**Example**: Remove 1x1 pixel image used for layout based on file name using
`ignore_inline_image: (spacer\.gif)`:
`<img src="http://img.mitarbeiterangebote.de/images/newsletter/spacer.gif"/>`

```
# ignore inline image by regular expression
#ignore_inline_image: (spacer\.gif)
```

#### Telegram
`bot_token`: When the bot is registered via [@botfather](https://telegram.me/botfather)
it will get a unique and long token. Enter this token here (ex.: `123456789:djc28e398e223lkje`).
Expand Down
3 changes: 3 additions & 0 deletions conf/mailToTelegramForwarder.conf
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ server: <IMAP mail server>
# max length (characters) of forwarded mail content
#max_length: 2000

# ignore inline image by regular expression
#ignore_inline_image: (spacer\.gif)

# Not yet available:
# # use IMAPE IDLE (push) mode [True|False]
# push_mode: False
Expand Down
82 changes: 45 additions & 37 deletions mailToTelegramForwarder.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
"""

__appname__ = "Mail to Telegram Forwarder"
__version__ = "0.2.1"
__version__ = "0.2.2"
__author__ = "Awalon (https://github.com/awalon)"

with warnings.catch_warnings(record=True) as w:
Expand Down Expand Up @@ -139,6 +139,7 @@ class Config:
imap_max_length = 2000
imap_read_old_mails = False
imap_read_old_mails_processed = False
imap_ignore_inline_image = ''

tg_bot_token = None
tg_forward_to_chat_id = None
Expand Down Expand Up @@ -175,6 +176,8 @@ def __init__(self, tool, cmd_args):
self.imap_search = self.get_config('Mail', 'search', self.imap_search)
self.imap_mark_as_read = self.get_config('Mail', 'mark_as_read', self.imap_mark_as_read, bool)
self.imap_max_length = self.get_config('Mail', 'max_length', self.imap_max_length, int)
self.imap_ignore_inline_image = self.get_config('Mail', 'ignore_inline_image',
self.imap_ignore_inline_image)

self.tg_bot_token = self.get_config('Telegram', 'bot_token', self.tg_bot_token)
tool.mask_error_data.append(self.tg_bot_token)
Expand Down Expand Up @@ -312,8 +315,7 @@ class TelegramBot:
def __init__(self, config: Config):
self.config = config

@staticmethod
def cleanup_html(message: str, images: typing.Optional[dict[str, MailAttachment]] = None) -> str:
def cleanup_html(self, message: str, images: typing.Optional[dict[str, MailAttachment]] = None) -> str:
"""
Parse HTML message and remove HTML elements not supported by Telegram
"""
Expand Down Expand Up @@ -351,7 +353,7 @@ def cleanup_html(message: str, images: typing.Optional[dict[str, MailAttachment]
image_seen: dict[str] = {}
for match in re.finditer(
r'(?P<img><\s*img\s+[^>]*?\s*src\s*=\s*"'
r'(?P<src>(?P<proto>(cid|https?):/*(?P<cid>[^"]*)))"[^>]*?/?\s*>)',
r'(?P<src>(?P<proto>(cid|https?)):/*(?P<cid>[^"]*))"[^>]*?/?\s*>)',
tg_body,
flags=(re.DOTALL | re.MULTILINE | re.IGNORECASE)):
img: str = match.group('img')
Expand All @@ -364,6 +366,9 @@ def cleanup_html(message: str, images: typing.Optional[dict[str, MailAttachment]
if 'http' in proto:
# web link
src = match.group('src')
if self.config.imap_ignore_inline_image \
and re.search(self.config.imap_ignore_inline_image, src, re.IGNORECASE):
continue
tg_body = tg_body.replace(img, "${img-link:%s|%s}" % (src, alt))
else:
# attached/embedded image
Expand Down Expand Up @@ -501,8 +506,9 @@ def send_message(self, mails: [MailData]):
text=message,
disable_web_page_preview=False)

logging.info("Mail summary for '%s' was sent with ID '%i' to '%s' (ID: '%i')"
% (mail.mail_subject, tg_message.message_id,
logging.info("Mail summary for '%s' (UID: '%s') was sent"
" with message ID '%i' to '%s' (ID: '%i')"
% (mail.mail_subject, mail.uid, tg_message.message_id,
tg_chat_title, self.config.tg_forward_to_chat_id))

if self.config.tg_forward_attachment and len(mail.attachments) > 0:
Expand Down Expand Up @@ -726,7 +732,7 @@ def decode_body(msg) -> MailBody:
body.images = images
return body

def get_last_uid(self):
def get_last_uid(self) -> str:
"""
get UID of most recent mail
"""
Expand Down Expand Up @@ -799,10 +805,10 @@ def parse_mail(self, uid, mail) -> (MailData, None):
max_len = self.config.imap_max_length
content_len = len(content)
if message_type == MailDataType.HTML and content_len > 0:
# get length from parsed HTML (all tags removed)
content_plain = re.sub(r'<[^>]*>', '', content, flags=re.MULTILINE)
# get length of parsed HTML (all tags and masked images (ex.: '${<image>|<title>}') removed)
content_plain: str = re.sub(r'(<[^>]*>)|(\${[^}]+})', '', content, flags=re.MULTILINE)
# get new max length based on plain text factor
plain_factor = (len(content_plain) / content_len) + float(1)
plain_factor: float = (len(content_plain) / content_len) + float(1)
max_len = int(max_len * plain_factor)
if content_len > max_len:
content = content[:max_len]
Expand Down Expand Up @@ -882,7 +888,7 @@ def search_mails(self) -> [MailData]:
"""
if self.last_uid is None or self.last_uid == '':
self.last_uid = self.get_last_uid()
logging.info("Most recent UID: %s" % self.last_uid)
logging.info("Most recent UID: '%s'" % self.last_uid)

# build IMAP search string
search_string = self.config.imap_search
Expand Down Expand Up @@ -919,43 +925,45 @@ def search_mails(self) -> [MailData]:
mails = []
if self.config.imap_read_old_mails and not self.config.imap_read_old_mails_processed:
# ignore current/max UID during first loop
max_num = 0
max_uid = ''
# don't repeat this on next loops
self.config.imap_read_old_mails = False
logging.info('Ignore max UID %s, as old mails have to be processed first...' % self.last_uid)
logging.info("Ignore most recent UID '%s', as old mails have to be processed first..." % self.last_uid)
else:
max_num = int(self.last_uid)
max_uid = self.last_uid
if not self.config.imap_read_old_mails_processed:
self.config.imap_read_old_mails_processed = True
logging.info('Reading mails having UID greater than %s...' % self.last_uid)
logging.info("Reading mails having UID more recent than '%s', using search: '%s'"
% (self.last_uid, search_string))

for num in sorted(data[0].split()):
current_uid = int(self.config.tool.binary_to_string(num))
for cur_uid in sorted(data[0].split()):
current_uid = self.config.tool.binary_to_string(cur_uid)

if current_uid > max_num:
try:
rv, data = self.mailbox.uid('fetch', num, '(RFC822)')
if rv != 'OK':
logging.error("ERROR getting message: %s" % num)
return

msg_raw = data[0][1]
mail = self.parse_mail(self.config.tool.binary_to_string(num), msg_raw)
if mail is None:
logging.error("Can't parse mail with UID: %s" % num)
else:
mails.append(mail)
try:
rv, data = self.mailbox.uid('fetch', cur_uid, '(RFC822)')
if rv != 'OK':
logging.error("ERROR getting message: %s" % current_uid)
return

msg_raw = data[0][1]
mail = self.parse_mail(current_uid, msg_raw)
if mail is None:
logging.error("Can't parse mail with UID: '%s'" % current_uid)
else:
logging.info("Parsed mail with UID '%s': '%s'" % (current_uid, mail.mail_subject))
mails.append(mail)

except Exception as mail_error:
logging.critical("Cannot process mail: %s" % ', '.join(map(str, mail_error.args)))
except Exception as mail_error:
logging.critical("Cannot process mail with UID '%s': %s" % (current_uid,
', '.join(map(str, mail_error.args))))

finally:
# remember new UID for next loop
max_num = current_uid
finally:
# remember new UID for next loop
max_uid = current_uid

if len(mails) > 0:
self.last_uid = str(max_num)
logging.info("Got %i new mail(s) to forward, changed UID to %s" % (len(mails), self.last_uid))
self.last_uid = max_uid
logging.info("Got %i new mail(s) to forward, using most recent UID: '%s'" % (len(mails), self.last_uid))
return mails


Expand Down

0 comments on commit 042e2ef

Please sign in to comment.