Add asynchronous when downloading pdfs, fix the 1st january issue, and minors fixes/improvements

This commit is contained in:
2025-10-11 11:37:44 +02:00
parent ced5247136
commit 55ccbdd42a
2 changed files with 205 additions and 122 deletions

93
mail.py
View File

@@ -1,34 +1,49 @@
import logging
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
logger = logging.getLogger("ovh_factures.fetcher")
def _as_header(value) -> str:
# Convertit en chaîne pour les en-têtes MIME; join pour listes/tuples.
if isinstance(value, (list, tuple, set)):
return ", ".join(map(str, value))
return str(value)
def _as_rcpt_list(value):
# Liste de destinataires pour SMTP
if value is None:
return []
if isinstance(value, (list, tuple, set)):
return [str(v) for v in value]
return [str(value)]
def construct_html(bills: list[tuple[str, str]]) -> str:
rows = []
for bill_id, date in bills:
rows.append(
f"<li><b style='color:#2c3e50;'>Facture n°{bill_id}</b> — "
f"<span style='color:#16a085;'>émise le {date}</span></li>"
)
template = f"""<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Nouvelle(s) facture(s) reçue(s)</title>
</head>
<body style="font-family:Arial, sans-serif; background:#f9f9f9; color:#333;">
<div style="max-width:600px; margin:auto; padding:20px; background:#fff; border:1px solid #ddd; border-radius:8px;">
<h2 style="color:#e74c3c; text-align:center;">
Vous avez reçu <b>{len(bills)}</b> nouvelle(s) facture(s)
</h2>
<ul style="line-height:1.6; font-size:14px;">
{"".join(rows)}
</ul>
</div>
</body>
</html>"""
return template
try:
rows = [
f"<li><b style='color:#2c3e50;'>Facture n°{b}</b> — "
f"<span style='color:#16a085;'>émise le {d}</span></li>"
for b, d in bills
]
return f"""<!DOCTYPE html>
<html>
<head><meta charset="utf-8"><title>Nouvelle(s) facture(s)</title></head>
<body style="font-family:Arial, sans-serif;background:#f9f9f9;color:#333;">
<div style="max-width:600px;margin:auto;padding:20px;background:#fff;border:1px solid #ddd;border-radius:8px;">
<h2 style="color:#e74c3c;text-align:center;">
Vous avez reçu <b>{len(bills)}</b> nouvelle(s) facture(s)
</h2>
<ul style="line-height:1.6;font-size:14px;">{"".join(rows)}</ul>
</div>
</body>
</html>"""
except Exception as e:
logger.exception("Erreur dans construct_html", e)
return ""
def send_email(
@@ -39,14 +54,22 @@ def send_email(
smtp_mail_address,
smpt_port,
email_to,
on_error=None,
):
msg = MIMEMultipart()
msg["From"] = email_from
msg["To"] = email_to
msg["Subject"] = subject
msg.attach(MIMEText(content, "html"))
try:
msg = MIMEMultipart()
msg["From"] = _as_header(email_from)
msg["To"] = _as_header(email_to)
msg["Subject"] = _as_header(subject)
msg.attach(MIMEText(str(content), "html"))
with smtplib.SMTP(smtp_mail_address, smpt_port) as server:
server.starttls()
server.login(email_from, email_password)
server.sendmail(email_from, email_to, msg.as_string())
rcpts = _as_rcpt_list(email_to)
with smtplib.SMTP(smtp_mail_address, smpt_port) as server:
server.starttls()
server.login(str(email_from), str(email_password))
server.sendmail(str(email_from), rcpts, msg.as_string())
except Exception as e:
logger.exception("Erreur dans send_email")
if on_error:
on_error(e)

234
main.py
View File

@@ -1,4 +1,6 @@
import os
import argparse
import concurrent.futures
import mail as ml
from datetime import date, datetime
import dotenv
@@ -6,47 +8,42 @@ import ovh
import fetcher as ft
from urllib.request import urlretrieve
import logging
from logging.handlers import RotatingFileHandler
from logging.handlers import TimedRotatingFileHandler
import traceback
import sqlite3
import time as tm
# --- Configuration du logging ---
logging.addLevelName(logging.DEBUG, "DÉBOGAGE")
logging.addLevelName(logging.INFO, "INFO")
logging.addLevelName(logging.WARNING, "AVERTISSEMENT")
logging.addLevelName(logging.ERROR, "ERREUR")
logging.addLevelName(logging.CRITICAL, "CRITIQUE")
logger = logging.getLogger("ovh_factures")
logger.setLevel(logging.INFO)
formatter = logging.Formatter(
fmt="%(asctime)s | %(levelname)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
# Console
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)
# Fichier
fh = RotatingFileHandler(
"ovh_factures.log", maxBytes=5_000_000, backupCount=3, encoding="utf-8"
)
fh.setFormatter(formatter)
logger.addHandler(fh)
def init():
global logger
# --- Configuration du logging ---
logging.addLevelName(logging.DEBUG, "DÉBOGAGE")
logging.addLevelName(logging.INFO, "INFO")
logging.addLevelName(logging.WARNING, "AVERTISSEMENT")
logging.addLevelName(logging.ERROR, "ERREUR")
# Chargement des variables d'environnement (.env)
dotenv.load_dotenv()
APP_KEY = os.environ["APP_KEY"]
APP_SECRET = os.environ["APP_SECRET"]
CONSUMER_KEY = os.environ["CONSUMER_KEY"]
PATH_OVH = os.environ["OVH_PATH"]
DB_PATH = os.environ["DB_PATH"]
EMAIL = os.environ["EMAIL"]
EMAIL_PASSWORD = os.environ["EMAIL_PASSWORD"]
SMTP_MAIL_ADDRESS = os.environ["SMTP_MAIL_ADDRESS"]
SMTP_PORT = os.environ["SMTP_PORT"]
EMAIL_TO = os.environ["EMAIL_TO"]
YEAR = datetime.now().year # Année courante (int)
os.makedirs(PATH_LOG, exist_ok=True)
logger = logging.getLogger(os.path.join(PATH_LOG, "ovh"))
logger.setLevel(logging.INFO)
formatter = logging.Formatter(
fmt="%(asctime)s | %(levelname)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
# Console
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)
# Fichier
fh = TimedRotatingFileHandler(
os.path.join(PATH_LOG, "ovh.log"),
when="M",
interval=1,
backupCount=12,
encoding="utf-8",
)
fh.setFormatter(formatter)
logger.addHandler(fh)
def get_conn():
@@ -62,6 +59,12 @@ def get_conn():
bill_id TEXT PRIMARY KEY,
bill_year INT
)""")
conn.execute("""
CREATE TABLE IF NOT EXISTS dj_bill (
bill_id TEXT PRIMARY KEY,
bill_year INT
)""")
conn.commit()
logger.info("Base SQLite initialisée et table 'bills' disponible")
return conn
@@ -85,76 +88,83 @@ def send_error_mail(error_msg):
pass
def add_entries_to_db(entries: list[tuple[str, int]], conn):
def add_entries_to_db(entries: list[tuple[str, int]], conn, table: str):
"""
Insère en lot des paires (bill_id, bill_year) dans la table 'bills' avec gestion de conflit sur bill_id.
Insère en lot des paires (bill_id, bill_year) dans la table spécifiée avec gestion de conflit sur bill_id.
"""
try:
logger.debug("Insertion batch dans 'bills': %d entrées", len(entries))
conn.executemany(
"""
INSERT INTO bills (bill_id, bill_year)
logger.debug("Insertion batch dans '%s': %d entrées", table, len(entries))
query = f"""
INSERT INTO {table} (bill_id, bill_year)
VALUES (?, ?)
ON CONFLICT(bill_id) DO NOTHING
""",
entries,
)
"""
conn.executemany(query, entries)
conn.commit()
logger.info("Insertion batch dans 'bills' validée")
logger.info("Insertion batch dans '%s' validée", table)
except Exception as e:
logger.exception("Échec d'insertion batch dans 'bills': %s", e)
logger.exception("Échec d'insertion batch dans '%s': %s", table, e)
send_error_mail(traceback.format_exc())
raise
def get_entries_from_db(conn) -> set[str]:
def get_entries_from_db(conn, table: str) -> set[str]:
"""
Récupère l'ensemble des bill_id présents dans la table 'bills' et les retourne sous forme de set[str].
Récupère l'ensemble des bill_id présents dans la table demandée et les retourne sous forme de set[str].
"""
if table not in _ALLOWED_TABLES:
raise ValueError(f"Table inconnue: {table}")
try:
logger.debug("Sélection des bill_id depuis 'bills'")
cursor = conn.execute("SELECT bill_id FROM bills")
logger.debug("Sélection des bill_id depuis '%s'", table)
cursor = conn.execute(f"SELECT bill_id FROM {table}")
rows = cursor.fetchall()
logger.info("Sélection terminée: %d bill_id récupérés", len(rows))
return {row[0] for row in rows}
except Exception as e:
logger.exception("Échec de lecture des bill_id depuis 'bills': %s", e)
logger.exception("Échec de lecture des bill_id depuis '%s': %s", table, e)
send_error_mail(traceback.format_exc())
raise
def compare_db_to_data(db_data: set[str], data: list[str]) -> list[str]:
"""
Compare une collection d'identifiants 'data' à l'ensemble 'db_data' et retourne la liste des éléments absents de 'db_data'.
"""
missings_current_year = list()
for bill_id in data:
if bill_id not in db_data:
missings_current_year.append(bill_id)
return missings_current_year
return [x for x in data if x not in db_data]
def indexer(ids: list[str]) -> list[str]:
"""
Parcourt le répertoire de l'année courante, filtre les factures déjà présentes localement, conserve les factures absentes datées de l'année courante, et enregistre en base celles qui appartiennent à une autre année.
Parcourt le répertoire de l'année courante, filtre les factures déjà présentes localement,
conserve les factures absentes datées de l'année courante, et enregistre en base celles
qui appartiennent à une autre année. Gère explicitement les cas 31/12 (YEAR-1) et 01/01 (YEAR).
"""
conn = get_conn()
logger.info("Indexation des factures pour l'année %s", YEAR)
target_dir = os.path.join(PATH_OVH, str(YEAR))
target_dir = os.path.join(PATH_OVH, str(YEAR))
try:
ids_already_in = os.listdir(target_dir)
ids_already_in = {fn for fn in os.listdir(target_dir) if fn.endswith(".pdf")}
except FileNotFoundError:
logger.warning("Dossier %s inexistant, aucune facture locale", target_dir)
ids_already_in = []
ids_already_in = set()
missing = compare_db_to_data(
get_entries_from_db(conn), [x for x in ids if f"{x}.pdf" not in ids_already_in]
)
expected_missing = [x for x in ids if f"{x}.pdf" not in ids_already_in]
missing = compare_db_to_data(get_entries_from_db(conn, "bills"), expected_missing)
logger.info("%d factures absentes détectées", len(missing))
result: list[str] = []
not_valid_year: list[tuple[str, int]] = list()
not_valid_year: list[tuple[str, int]] = []
now = datetime.now()
boundary_run = (now.month, now.day) in {(12, 31), (1, 1)}
bills_downloaded_dj = set()
if boundary_run:
try:
bills_downloaded_dj = set(get_entries_from_db(conn, "dj_bill"))
except Exception:
bills_downloaded_dj = set()
dj_bills: list[tuple[str, date]] = []
for bill_id in missing:
try:
meta = ft.fetch_invoice_content(
@@ -165,17 +175,42 @@ def indexer(ids: list[str]) -> list[str]:
)
except Exception as e:
logger.error("Impossible de récupérer le json pour %s : %s", bill_id, e)
send_error_mail(traceback.format_exc())
continue
bill_year = datetime.fromisoformat(meta["date"]).year
if bill_year == YEAR: # todo 1 januray case
try:
bill_dt = datetime.fromisoformat(meta["date"]).date()
except Exception:
logger.error("Date invalide pour %s: %r", bill_id, meta.get("date"))
continue
if bill_dt.year == YEAR:
result.append(bill_id)
else:
not_valid_year.append((bill_id, bill_year))
not_valid_year.append((bill_id, bill_dt.year))
if boundary_run:
is_dec31_prev = bill_dt == date(YEAR - 1, 12, 31)
is_jan1_curr = bill_dt == date(YEAR, 1, 1)
if (is_dec31_prev or is_jan1_curr) and bill_id not in bills_downloaded_dj:
dj_bills.append((bill_id, bill_dt))
if not_valid_year:
add_entries_to_db(not_valid_year, conn, "bills")
logger.info(
"Ajout de %d entrées hors année %s dans 'bills'", len(not_valid_year), YEAR
)
if dj_bills:
try:
add_entries_to_db(dj_bills, conn, "dj_bill")
logger.info(
"Ajout de %d factures de bascule (31/12, 01/01) dans 'dj_bill'",
len(dj_bills),
)
except Exception as e:
logger.error("Échec insertion 'dj_bill': %s", e)
add_entries_to_db(not_valid_year, conn)
logger.info(f"Ajouter {len(not_valid_year)} entrées a la base de donnée")
logger.info("%d factures retenues pour téléchargement", len(result))
return result
@@ -224,8 +259,7 @@ def save_pdf(bill: dict) -> None:
Télécharge le PDF dune facture dans un sous-dossier par année.
Noms de fichiers : <billId>.pdf
"""
year_dir = os.path.join(PATH_OVH, str(date.year))
year_dir = os.path.join(PATH_OVH, str(datetime.now().year))
os.makedirs(year_dir, exist_ok=True)
dest = os.path.join(year_dir, f"{bill['billId']}.pdf")
@@ -240,6 +274,27 @@ def save_pdf(bill: dict) -> None:
if __name__ == "__main__":
# Chargement des variables d'environnement (.env)
parser = argparse.ArgumentParser()
parser.add_argument("-e", "--env", required=True, help="Path of .env file")
args = parser.parse_args()
dotenv.load_dotenv(args.env)
APP_KEY = os.environ["APP_KEY"]
APP_SECRET = os.environ["APP_SECRET"]
CONSUMER_KEY = os.environ["CONSUMER_KEY"]
PATH_OVH = os.environ["OVH_PATH"]
PATH_LOG = os.environ["LOG_PATH"]
DB_PATH = os.environ["DB_PATH"]
EMAIL = os.environ["EMAIL"]
EMAIL_PASSWORD = os.environ["EMAIL_PASSWORD"]
SMTP_MAIL_ADDRESS = os.environ["SMTP_MAIL_ADDRESS"]
SMTP_PORT = os.environ["SMTP_PORT"]
EMAIL_TO = os.environ["EMAIL_TO"].strip().split(",")
YEAR = datetime.now().year # Année courante (int)
_ALLOWED_TABLES = {"bills", "dj_bill"}
init()
start = tm.time()
logger.info("Démarrage du traitement des factures OVH pour %s", YEAR)
os.makedirs(os.path.join(PATH_OVH, str(YEAR)), exist_ok=True)
@@ -248,18 +303,21 @@ if __name__ == "__main__":
bills_str = []
for bill_id in ids_candidats:
bills_json.append((bill_id, get_bill(bill_id)))
# pdf enregistrement.
if len(bills_json) > 0:
for bill_json in bills_json:
save_pdf(bill_json[1])
date = datetime.fromisoformat(bill_json[1]["date"]).date()
bills_str.append(
(
bill_json[0],
f"{date}",
)
)
# pdf enregistrement.
if bills_json:
with concurrent.futures.ThreadPoolExecutor() as ex:
futures = []
for b in bills_json:
futures.append(ex.submit(save_pdf, b[1]))
# tm.sleep(0.1)
for f in futures:
f.result(timeout=10)
for bill_id, bill_payload in bills_json:
d = datetime.fromisoformat(bill_payload["date"]).date()
bills_str.append((bill_id, f"{d}"))
content = ml.construct_html(bills_str)
ml.send_email(
"Reçu de facture(s)",
@@ -271,3 +329,5 @@ if __name__ == "__main__":
email_to=EMAIL_TO,
)
logger.info("Traitement terminé : %d factures téléchargées", len(ids_candidats))
end = tm.time()
logger.info(f"Runned for {round(end - start, 2)}secs")