Add asynchronous when downloading pdfs, fix the 1st january issue, and minors fixes/improvements

This commit is contained in:
2025-10-11 11:37:44 +02:00
parent ced5247136
commit 55ccbdd42a
2 changed files with 205 additions and 122 deletions

69
mail.py
View File

@@ -1,34 +1,49 @@
import logging
import smtplib import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
logger = logging.getLogger("ovh_factures.fetcher")
def _as_header(value) -> str:
# Convertit en chaîne pour les en-têtes MIME; join pour listes/tuples.
if isinstance(value, (list, tuple, set)):
return ", ".join(map(str, value))
return str(value)
def _as_rcpt_list(value):
# Liste de destinataires pour SMTP
if value is None:
return []
if isinstance(value, (list, tuple, set)):
return [str(v) for v in value]
return [str(value)]
def construct_html(bills: list[tuple[str, str]]) -> str: def construct_html(bills: list[tuple[str, str]]) -> str:
rows = [] try:
for bill_id, date in bills: rows = [
rows.append( f"<li><b style='color:#2c3e50;'>Facture n°{b}</b> — "
f"<li><b style='color:#2c3e50;'>Facture n°{bill_id}</b> — " f"<span style='color:#16a085;'>émise le {d}</span></li>"
f"<span style='color:#16a085;'>émise le {date}</span></li>" for b, d in bills
) ]
return f"""<!DOCTYPE html>
template = f"""<!DOCTYPE html>
<html> <html>
<head> <head><meta charset="utf-8"><title>Nouvelle(s) facture(s)</title></head>
<meta charset="utf-8">
<title>Nouvelle(s) facture(s) reçue(s)</title>
</head>
<body style="font-family:Arial, sans-serif;background:#f9f9f9;color:#333;"> <body style="font-family:Arial, sans-serif;background:#f9f9f9;color:#333;">
<div style="max-width:600px;margin:auto;padding:20px;background:#fff;border:1px solid #ddd;border-radius:8px;"> <div style="max-width:600px;margin:auto;padding:20px;background:#fff;border:1px solid #ddd;border-radius:8px;">
<h2 style="color:#e74c3c;text-align:center;"> <h2 style="color:#e74c3c;text-align:center;">
Vous avez reçu <b>{len(bills)}</b> nouvelle(s) facture(s) Vous avez reçu <b>{len(bills)}</b> nouvelle(s) facture(s)
</h2> </h2>
<ul style="line-height:1.6; font-size:14px;"> <ul style="line-height:1.6;font-size:14px;">{"".join(rows)}</ul>
{"".join(rows)}
</ul>
</div> </div>
</body> </body>
</html>""" </html>"""
return template except Exception as e:
logger.exception("Erreur dans construct_html", e)
return ""
def send_email( def send_email(
@@ -39,14 +54,22 @@ def send_email(
smtp_mail_address, smtp_mail_address,
smpt_port, smpt_port,
email_to, email_to,
on_error=None,
): ):
try:
msg = MIMEMultipart() msg = MIMEMultipart()
msg["From"] = email_from msg["From"] = _as_header(email_from)
msg["To"] = email_to msg["To"] = _as_header(email_to)
msg["Subject"] = subject msg["Subject"] = _as_header(subject)
msg.attach(MIMEText(content, "html")) msg.attach(MIMEText(str(content), "html"))
rcpts = _as_rcpt_list(email_to)
with smtplib.SMTP(smtp_mail_address, smpt_port) as server: with smtplib.SMTP(smtp_mail_address, smpt_port) as server:
server.starttls() server.starttls()
server.login(email_from, email_password) server.login(str(email_from), str(email_password))
server.sendmail(email_from, email_to, msg.as_string()) server.sendmail(str(email_from), rcpts, msg.as_string())
except Exception as e:
logger.exception("Erreur dans send_email")
if on_error:
on_error(e)

200
main.py
View File

@@ -1,4 +1,6 @@
import os import os
import argparse
import concurrent.futures
import mail as ml import mail as ml
from datetime import date, datetime from datetime import date, datetime
import dotenv import dotenv
@@ -6,18 +8,22 @@ import ovh
import fetcher as ft import fetcher as ft
from urllib.request import urlretrieve from urllib.request import urlretrieve
import logging import logging
from logging.handlers import RotatingFileHandler from logging.handlers import TimedRotatingFileHandler
import traceback import traceback
import sqlite3 import sqlite3
import time as tm
def init():
global logger
# --- Configuration du logging --- # --- Configuration du logging ---
logging.addLevelName(logging.DEBUG, "DÉBOGAGE") logging.addLevelName(logging.DEBUG, "DÉBOGAGE")
logging.addLevelName(logging.INFO, "INFO") logging.addLevelName(logging.INFO, "INFO")
logging.addLevelName(logging.WARNING, "AVERTISSEMENT") logging.addLevelName(logging.WARNING, "AVERTISSEMENT")
logging.addLevelName(logging.ERROR, "ERREUR") logging.addLevelName(logging.ERROR, "ERREUR")
logging.addLevelName(logging.CRITICAL, "CRITIQUE")
logger = logging.getLogger("ovh_factures") os.makedirs(PATH_LOG, exist_ok=True)
logger = logging.getLogger(os.path.join(PATH_LOG, "ovh"))
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
formatter = logging.Formatter( formatter = logging.Formatter(
fmt="%(asctime)s | %(levelname)s | %(message)s", fmt="%(asctime)s | %(levelname)s | %(message)s",
@@ -28,26 +34,17 @@ ch = logging.StreamHandler()
ch.setFormatter(formatter) ch.setFormatter(formatter)
logger.addHandler(ch) logger.addHandler(ch)
# Fichier # Fichier
fh = RotatingFileHandler(
"ovh_factures.log", maxBytes=5_000_000, backupCount=3, encoding="utf-8" fh = TimedRotatingFileHandler(
os.path.join(PATH_LOG, "ovh.log"),
when="M",
interval=1,
backupCount=12,
encoding="utf-8",
) )
fh.setFormatter(formatter) fh.setFormatter(formatter)
logger.addHandler(fh) logger.addHandler(fh)
# Chargement des variables d'environnement (.env)
dotenv.load_dotenv()
APP_KEY = os.environ["APP_KEY"]
APP_SECRET = os.environ["APP_SECRET"]
CONSUMER_KEY = os.environ["CONSUMER_KEY"]
PATH_OVH = os.environ["OVH_PATH"]
DB_PATH = os.environ["DB_PATH"]
EMAIL = os.environ["EMAIL"]
EMAIL_PASSWORD = os.environ["EMAIL_PASSWORD"]
SMTP_MAIL_ADDRESS = os.environ["SMTP_MAIL_ADDRESS"]
SMTP_PORT = os.environ["SMTP_PORT"]
EMAIL_TO = os.environ["EMAIL_TO"]
YEAR = datetime.now().year # Année courante (int)
def get_conn(): def get_conn():
""" """
@@ -62,6 +59,12 @@ def get_conn():
bill_id TEXT PRIMARY KEY, bill_id TEXT PRIMARY KEY,
bill_year INT bill_year INT
)""") )""")
conn.execute("""
CREATE TABLE IF NOT EXISTS dj_bill (
bill_id TEXT PRIMARY KEY,
bill_year INT
)""")
conn.commit() conn.commit()
logger.info("Base SQLite initialisée et table 'bills' disponible") logger.info("Base SQLite initialisée et table 'bills' disponible")
return conn return conn
@@ -85,76 +88,83 @@ def send_error_mail(error_msg):
pass pass
def add_entries_to_db(entries: list[tuple[str, int]], conn): def add_entries_to_db(entries: list[tuple[str, int]], conn, table: str):
""" """
Insère en lot des paires (bill_id, bill_year) dans la table 'bills' avec gestion de conflit sur bill_id. Insère en lot des paires (bill_id, bill_year) dans la table spécifiée avec gestion de conflit sur bill_id.
""" """
try: try:
logger.debug("Insertion batch dans 'bills': %d entrées", len(entries)) logger.debug("Insertion batch dans '%s': %d entrées", table, len(entries))
conn.executemany( query = f"""
""" INSERT INTO {table} (bill_id, bill_year)
INSERT INTO bills (bill_id, bill_year)
VALUES (?, ?) VALUES (?, ?)
ON CONFLICT(bill_id) DO NOTHING ON CONFLICT(bill_id) DO NOTHING
""", """
entries, conn.executemany(query, entries)
)
conn.commit() conn.commit()
logger.info("Insertion batch dans 'bills' validée") logger.info("Insertion batch dans '%s' validée", table)
except Exception as e: except Exception as e:
logger.exception("Échec d'insertion batch dans 'bills': %s", e) logger.exception("Échec d'insertion batch dans '%s': %s", table, e)
send_error_mail(traceback.format_exc()) send_error_mail(traceback.format_exc())
raise raise
def get_entries_from_db(conn) -> set[str]: def get_entries_from_db(conn, table: str) -> set[str]:
""" """
Récupère l'ensemble des bill_id présents dans la table 'bills' et les retourne sous forme de set[str]. Récupère l'ensemble des bill_id présents dans la table demandée et les retourne sous forme de set[str].
""" """
if table not in _ALLOWED_TABLES:
raise ValueError(f"Table inconnue: {table}")
try: try:
logger.debug("Sélection des bill_id depuis 'bills'") logger.debug("Sélection des bill_id depuis '%s'", table)
cursor = conn.execute("SELECT bill_id FROM bills") cursor = conn.execute(f"SELECT bill_id FROM {table}")
rows = cursor.fetchall() rows = cursor.fetchall()
logger.info("Sélection terminée: %d bill_id récupérés", len(rows)) logger.info("Sélection terminée: %d bill_id récupérés", len(rows))
return {row[0] for row in rows} return {row[0] for row in rows}
except Exception as e: except Exception as e:
logger.exception("Échec de lecture des bill_id depuis 'bills': %s", e) logger.exception("Échec de lecture des bill_id depuis '%s': %s", table, e)
send_error_mail(traceback.format_exc()) send_error_mail(traceback.format_exc())
raise raise
def compare_db_to_data(db_data: set[str], data: list[str]) -> list[str]: def compare_db_to_data(db_data: set[str], data: list[str]) -> list[str]:
""" return [x for x in data if x not in db_data]
Compare une collection d'identifiants 'data' à l'ensemble 'db_data' et retourne la liste des éléments absents de 'db_data'.
"""
missings_current_year = list()
for bill_id in data:
if bill_id not in db_data:
missings_current_year.append(bill_id)
return missings_current_year
def indexer(ids: list[str]) -> list[str]: def indexer(ids: list[str]) -> list[str]:
""" """
Parcourt le répertoire de l'année courante, filtre les factures déjà présentes localement, conserve les factures absentes datées de l'année courante, et enregistre en base celles qui appartiennent à une autre année. Parcourt le répertoire de l'année courante, filtre les factures déjà présentes localement,
conserve les factures absentes datées de l'année courante, et enregistre en base celles
qui appartiennent à une autre année. Gère explicitement les cas 31/12 (YEAR-1) et 01/01 (YEAR).
""" """
conn = get_conn() conn = get_conn()
logger.info("Indexation des factures pour l'année %s", YEAR) logger.info("Indexation des factures pour l'année %s", YEAR)
target_dir = os.path.join(PATH_OVH, str(YEAR))
target_dir = os.path.join(PATH_OVH, str(YEAR))
try: try:
ids_already_in = os.listdir(target_dir) ids_already_in = {fn for fn in os.listdir(target_dir) if fn.endswith(".pdf")}
except FileNotFoundError: except FileNotFoundError:
logger.warning("Dossier %s inexistant, aucune facture locale", target_dir) logger.warning("Dossier %s inexistant, aucune facture locale", target_dir)
ids_already_in = [] ids_already_in = set()
missing = compare_db_to_data( expected_missing = [x for x in ids if f"{x}.pdf" not in ids_already_in]
get_entries_from_db(conn), [x for x in ids if f"{x}.pdf" not in ids_already_in] missing = compare_db_to_data(get_entries_from_db(conn, "bills"), expected_missing)
)
logger.info("%d factures absentes détectées", len(missing)) logger.info("%d factures absentes détectées", len(missing))
result: list[str] = [] result: list[str] = []
not_valid_year: list[tuple[str, int]] = list() not_valid_year: list[tuple[str, int]] = []
now = datetime.now()
boundary_run = (now.month, now.day) in {(12, 31), (1, 1)}
bills_downloaded_dj = set()
if boundary_run:
try:
bills_downloaded_dj = set(get_entries_from_db(conn, "dj_bill"))
except Exception:
bills_downloaded_dj = set()
dj_bills: list[tuple[str, date]] = []
for bill_id in missing: for bill_id in missing:
try: try:
meta = ft.fetch_invoice_content( meta = ft.fetch_invoice_content(
@@ -165,17 +175,42 @@ def indexer(ids: list[str]) -> list[str]:
) )
except Exception as e: except Exception as e:
logger.error("Impossible de récupérer le json pour %s : %s", bill_id, e) logger.error("Impossible de récupérer le json pour %s : %s", bill_id, e)
send_error_mail(traceback.format_exc()) send_error_mail(traceback.format_exc())
continue continue
bill_year = datetime.fromisoformat(meta["date"]).year
if bill_year == YEAR: # todo 1 januray case try:
bill_dt = datetime.fromisoformat(meta["date"]).date()
except Exception:
logger.error("Date invalide pour %s: %r", bill_id, meta.get("date"))
continue
if bill_dt.year == YEAR:
result.append(bill_id) result.append(bill_id)
else: else:
not_valid_year.append((bill_id, bill_year)) not_valid_year.append((bill_id, bill_dt.year))
if boundary_run:
is_dec31_prev = bill_dt == date(YEAR - 1, 12, 31)
is_jan1_curr = bill_dt == date(YEAR, 1, 1)
if (is_dec31_prev or is_jan1_curr) and bill_id not in bills_downloaded_dj:
dj_bills.append((bill_id, bill_dt))
if not_valid_year:
add_entries_to_db(not_valid_year, conn, "bills")
logger.info(
"Ajout de %d entrées hors année %s dans 'bills'", len(not_valid_year), YEAR
)
if dj_bills:
try:
add_entries_to_db(dj_bills, conn, "dj_bill")
logger.info(
"Ajout de %d factures de bascule (31/12, 01/01) dans 'dj_bill'",
len(dj_bills),
)
except Exception as e:
logger.error("Échec insertion 'dj_bill': %s", e)
add_entries_to_db(not_valid_year, conn)
logger.info(f"Ajouter {len(not_valid_year)} entrées a la base de donnée")
logger.info("%d factures retenues pour téléchargement", len(result)) logger.info("%d factures retenues pour téléchargement", len(result))
return result return result
@@ -224,8 +259,7 @@ def save_pdf(bill: dict) -> None:
Télécharge le PDF dune facture dans un sous-dossier par année. Télécharge le PDF dune facture dans un sous-dossier par année.
Noms de fichiers : <billId>.pdf Noms de fichiers : <billId>.pdf
""" """
year_dir = os.path.join(PATH_OVH, str(datetime.now().year))
year_dir = os.path.join(PATH_OVH, str(date.year))
os.makedirs(year_dir, exist_ok=True) os.makedirs(year_dir, exist_ok=True)
dest = os.path.join(year_dir, f"{bill['billId']}.pdf") dest = os.path.join(year_dir, f"{bill['billId']}.pdf")
@@ -240,6 +274,27 @@ def save_pdf(bill: dict) -> None:
if __name__ == "__main__": if __name__ == "__main__":
# Chargement des variables d'environnement (.env)
parser = argparse.ArgumentParser()
parser.add_argument("-e", "--env", required=True, help="Path of .env file")
args = parser.parse_args()
dotenv.load_dotenv(args.env)
APP_KEY = os.environ["APP_KEY"]
APP_SECRET = os.environ["APP_SECRET"]
CONSUMER_KEY = os.environ["CONSUMER_KEY"]
PATH_OVH = os.environ["OVH_PATH"]
PATH_LOG = os.environ["LOG_PATH"]
DB_PATH = os.environ["DB_PATH"]
EMAIL = os.environ["EMAIL"]
EMAIL_PASSWORD = os.environ["EMAIL_PASSWORD"]
SMTP_MAIL_ADDRESS = os.environ["SMTP_MAIL_ADDRESS"]
SMTP_PORT = os.environ["SMTP_PORT"]
EMAIL_TO = os.environ["EMAIL_TO"].strip().split(",")
YEAR = datetime.now().year # Année courante (int)
_ALLOWED_TABLES = {"bills", "dj_bill"}
init()
start = tm.time()
logger.info("Démarrage du traitement des factures OVH pour %s", YEAR) logger.info("Démarrage du traitement des factures OVH pour %s", YEAR)
os.makedirs(os.path.join(PATH_OVH, str(YEAR)), exist_ok=True) os.makedirs(os.path.join(PATH_OVH, str(YEAR)), exist_ok=True)
@@ -248,18 +303,21 @@ if __name__ == "__main__":
bills_str = [] bills_str = []
for bill_id in ids_candidats: for bill_id in ids_candidats:
bills_json.append((bill_id, get_bill(bill_id))) bills_json.append((bill_id, get_bill(bill_id)))
# pdf enregistrement.
if len(bills_json) > 0:
for bill_json in bills_json:
save_pdf(bill_json[1])
date = datetime.fromisoformat(bill_json[1]["date"]).date()
bills_str.append( # pdf enregistrement.
(
bill_json[0], if bills_json:
f"{date}", with concurrent.futures.ThreadPoolExecutor() as ex:
) futures = []
) for b in bills_json:
futures.append(ex.submit(save_pdf, b[1]))
# tm.sleep(0.1)
for f in futures:
f.result(timeout=10)
for bill_id, bill_payload in bills_json:
d = datetime.fromisoformat(bill_payload["date"]).date()
bills_str.append((bill_id, f"{d}"))
content = ml.construct_html(bills_str) content = ml.construct_html(bills_str)
ml.send_email( ml.send_email(
"Reçu de facture(s)", "Reçu de facture(s)",
@@ -271,3 +329,5 @@ if __name__ == "__main__":
email_to=EMAIL_TO, email_to=EMAIL_TO,
) )
logger.info("Traitement terminé : %d factures téléchargées", len(ids_candidats)) logger.info("Traitement terminé : %d factures téléchargées", len(ids_candidats))
end = tm.time()
logger.info(f"Runned for {round(end - start, 2)}secs")