Skip to content

Commit

Permalink
Merge pull request #47 from c2corg/polish-clean-rc
Browse files Browse the repository at this point in the history
Fine polish clean recent changes
  • Loading branch information
cbeauchesne authored Feb 18, 2022
2 parents eeea9bd + d19b7ef commit f2683e7
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 47 deletions.
14 changes: 9 additions & 5 deletions campbot/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
CampBot, Python bot framework for camptocamp.org
Usage:
campbot clean_rc <days> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch]
campbot clean_rc <days> <lang> <thread_url> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch]
campbot report_rc <days> <lang> <thread_url> [--login=<login>] [--password=<password>] [--delay=<seconds>]
campbot clean <url_or_file> <langs> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch] [--bbcode]
campbot clean <url_or_file> <lang> <thread_url> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch] [--bbcode]
campbot report <url_or_file> <lang> [--login=<login>] [--password=<password>] [--delay=<seconds>]
campbot contribs [--out=<filename>] [--starts=<start_date>] [--ends=<end_date>] [--delay=<seconds>]
campbot export <url> [--out=<filename>] [--delay=<seconds>]
Expand All @@ -28,7 +28,7 @@
filename is also accepted, and must be like :
123 | r
456 | w
<langs> is comma-separated lang identifiers, like fr,de for french and german.
<lang> is a lang identifier, like fr for french.
report Make quality report on documents.
contribs Export all contribution in a CSV file. <start_date> and <end_date> are like 2018-05-12
export Export all documents in a CSV file.
Expand Down Expand Up @@ -91,7 +91,10 @@ def main(args):

elif args["clean_rc"]:
get_campbot(args).clean_recent_changes(
days=int(args["<days>"]), lang="fr", ask_before_saving=not args["--batch"],
days=float(args["<days>"]),
lang=args["<lang>"],
ask_before_saving=not args["--batch"],
thread_url=args["<thread_url>"],
)

elif args["report"]:
Expand All @@ -102,8 +105,9 @@ def main(args):
elif args["clean"]:
get_campbot(args).clean(
args["<url_or_file>"],
langs=args["<langs>"].split(","),
lang=args["<lang>"],
ask_before_saving=not args["--batch"],
thread_url=args["<thread_url>"],
clean_bbcode=args["--bbcode"],
)

Expand Down
1 change: 0 additions & 1 deletion campbot/checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@ def report_recent_changes(bot, days, lang, thread_url):
tests = get_fixed_tests(lang)
tests += get_re_tests(bot.forum.get_post(url=thread_url), lang)

logging.info(f"Get modified documents from {oldest_date} to {newest_date}")
items = bot.get_modified_documents(
lang=lang, oldest_date=oldest_date, newest_date=newest_date
).values()
Expand Down
96 changes: 62 additions & 34 deletions campbot/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import requests
from datetime import datetime, timedelta
from dateutil import parser
from collections import OrderedDict
from collections import OrderedDict, defaultdict
import pytz
import logging
import time
Expand Down Expand Up @@ -523,24 +523,24 @@ def _get_documents_from_url(self, url):
constructor, filters = _parse_filter(url)
return self.wiki.get_documents(filters, constructor=constructor)

def clean(self, url_or_filename, langs, ask_before_saving=True, clean_bbcode=False):
def clean(
self, url_or_filename, lang, ask_before_saving, thread_url, clean_bbcode=False
):
"""
Clean a set of document.
:param url_or_filename: Camptocamp.org URL, or filename
:param langs: comma-separated list of lang identifiers
:param lang: lang identifier
:param ask_before_saving: Boolean
:param clean_bbcode: Boolean
"""

assert len(langs) != 0

documents = self.get_documents(url_or_filename)
processors = get_automatic_replacments(self, clean_bbcode)
report_header = f"Clean documents from `{url_or_filename}`"

self._process_documents(
documents, processors, langs, ask_before_saving, excluded_ids=[996571,]
documents, lang, ask_before_saving, report_header, thread_url, clean_bbcode
)

def report(self, url_or_filename, lang):
Expand Down Expand Up @@ -581,48 +581,77 @@ def report(self, url_or_filename, lang):
print("\n".join(stdout_report))

def _process_documents(
self, documents, processors, langs, ask_before_saving=True, excluded_ids=None
self,
documents,
lang,
ask_before_saving,
report_header,
thread_url,
clean_bbcode=False,
):

for document in documents:
excluded_document_ids = [
996571, # article with all automatic corrections
]

processors = get_automatic_replacments(self, clean_bbcode=clean_bbcode)

report = defaultdict(int)

for document in documents:
if "redirects_to" in document:
pass # document id is not available...
continue # document id is not available...

document_url = document.get_url()
report[f"Inspected"] += 1

elif excluded_ids is not None and document.document_id in excluded_ids:
if document.document_id in excluded_document_ids:
pass

elif document.get("protected", False) and not self.moderator:
print("{} is a protected".format(document.get_url()))
logging.info(f"{document_url} is protected")
report["Skipped because protected"] += 1

elif document.is_personal() and not self.moderator:
print("{} is a personal".format(document.get_url()))
logging.info(f"{document_url} is a personal document")
report["Skipped because is not CC-BY-SA"] += 1

elif not document.is_valid():
print(
"{} : {}".format(
document.get_url(), document.get_invalidity_reason()
)
)
reason = document.get_invalidity_reason()
logging.info(f"{document_url} : {reason}")
report[f"Skipped because {reason}"] += 1

else:
messages = []
must_save = False

for processor in processors:
if processor.ready_for_production:
if processor(document, langs):
if processor(document, [lang,]):
messages.append(processor.comment)
must_save = True

if must_save:
comment = ", ".join(messages)
try:
document.save(comment, ask_before_saving=ask_before_saving)
except Exception as e:
print(
"Error while saving {} :\n{}".format(document.get_url(), e)
new_document = document.save(
comment, ask_before_saving=ask_before_saving
)
except Exception as e:
report["Unexpcted error"] += 1
logging.error(f"Error while saving {document_url} :\n{e}")
else:
if new_document is None:
report["Skipped by bot owner"] += 1
else:
report["Corrected"] += 1

log_report = "\n".join(
[f"* `{bucket}`: {count}" for bucket, count in report.items()]
)
self.forum.post_message(f"### {report_header}\n\n{log_report}", thread_url)

return report

def export(self, url, filename=None):
"""
Expand Down Expand Up @@ -719,6 +748,10 @@ def write(**kwargs):
def get_modified_documents(
self, lang, oldest_date=None, newest_date=None, excluded_users=()
):
logging.info(
f"Get modified documents from {oldest_date} to {newest_date} in lang:{lang}"
)

result = OrderedDict()
for contrib in self.wiki.get_contributions(
oldest_date=oldest_date, newest_date=newest_date
Expand All @@ -738,15 +771,11 @@ def get_modified_documents(

return result

def clean_recent_changes(self, days, lang, ask_before_saving):
def clean_recent_changes(self, days, lang, ask_before_saving, thread_url):
newest_date = utils.today().replace(hour=0, minute=0, second=0, microsecond=0)
oldest_date = newest_date - timedelta(days=days)

excluded_ids = [
996571,
]

processors = get_automatic_replacments(self)
report_header = f"Clean recent change from `{oldest_date}` to `{newest_date}`"

def get_documents():

Expand All @@ -761,12 +790,11 @@ def get_documents():
document_id, document_type=document_type
)

if document_id not in excluded_ids:
yield document
yield document

print("Fix recent changes")
self._process_documents(get_documents(), processors, [lang,], ask_before_saving)
print("Fix recent changes finished")
self._process_documents(
get_documents(), lang, ask_before_saving, report_header, thread_url
)

def get_new_contributors(self, contrib_threshold=20, outings_threshold=15):
with open("contributors.txt", "r") as f:
Expand Down
3 changes: 2 additions & 1 deletion campbot/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from __future__ import print_function, unicode_literals, division

import re
import logging
from .differ import get_diff_report


Expand Down Expand Up @@ -289,7 +290,7 @@ def save(self, message, ask_before_saving=True):
if _input("Save {} : {}, y/[n] ?\n".format(self.get_url(), message)) != "y":
return None
else:
print("Saving {} : {}".format(self.get_url(), message))
logging.info(f"Saving {self.get_url()} : {message}")

return self._campbot.wiki.put(
"/{}/{}".format(self.url_path, self.document_id),
Expand Down
4 changes: 2 additions & 2 deletions docs/CLI/clean.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ Command line

.. code-block:: bash
campbot clean <url> <langs> --login=<login> --password=<password> [--delay=<seconds>] [--bbcode]
campbot clean <url> <lang> --login=<login> --password=<password> [--delay=<seconds>] [--bbcode]
Options and arguments
---------------------

* ``<url>`` is like https://www.camptocamp.org/routes#w=940468 : all routes associated to waypoint 940468 will be cleaned. Shorthand ``routes#w=940468`` is accepted.
* ``<langs>`` is a comma-saprated list of langs, like fr,de. Clean procedure will impacts only this langs.
* ``<lang>`` is a lang identifier, like fr or de. Clean procedure will impacts only this lang.

Clean processors
----------------
Expand Down
8 changes: 4 additions & 4 deletions docs/CLI/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ Here isthe help you get by typing ``campbot`` in you command line tool :
CampBot, Python bot framework for camptocamp.org
Usage:
campbot clean_rc <days> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch]
campbot report_rc <days> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch]
campbot clean <url_or_file> <langs> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch] [--bbcode]
campbot clean_rc <days> <lang> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch]
campbot report_rc <days> <lang> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch]
campbot clean <url_or_file> <lang> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch] [--bbcode]
campbot contribs [--out=<filename>] [--starts=<start_date>] [--ends=<end_date>] [--delay=<seconds>]
campbot export <url> [--out=<filename>] [--delay=<seconds>]
Expand All @@ -33,7 +33,7 @@ Here isthe help you get by typing ``campbot`` in you command line tool :
filename is also accepted, and must be like :
123 | r
456 | w
<langs> is comma-separated lang identifiers, like fr,de for french and german.
<lang> is a lang identifiers, like fr for french.
contribs Export all contribution in a CSV file. <start_date> and <end_date> are like 2018-05-12
export Export all documents in a CSV file.
<url> is like https://www.camptocamp.org/outings#u=2, or, simplier, outings#u=2
Expand Down

0 comments on commit f2683e7

Please sign in to comment.