add sqlite3 db to avoid useless downloads

This commit is contained in:
2025-09-05 17:53:48 +02:00
parent 1a0acb242c
commit 0181b7d855
2 changed files with 479 additions and 9 deletions

86
main.py
View File

@@ -6,6 +6,7 @@ import fetcher as ft
from urllib.request import urlretrieve
import logging
from logging.handlers import RotatingFileHandler
import sqlite3
# --- Configuration du logging ---
logging.addLevelName(logging.DEBUG, "DÉBOGAGE")
@@ -37,15 +38,83 @@ APP_KEY = os.environ["APP_KEY"]
APP_SECRET = os.environ["APP_SECRET"]
CONSUMER_KEY = os.environ["CONSUMER_KEY"]
PATH_OVH = os.environ["OVH_PATH"]
DB_PATH = os.environ["DB_PATH"]
YEAR = datetime.now().year # Année courante (int)
def get_conn():
"""
Ouvre une connexion SQLite vers DB_PATH, crée la table 'bills' si nécessaire, puis retourne la connexion.
"""
try:
logger.debug("Ouverture de la connexion SQLite vers %s", DB_PATH)
conn = sqlite3.connect(DB_PATH)
logger.debug("Connexion établie, vérification/creation de la table 'bills'")
conn.execute("""
CREATE TABLE IF NOT EXISTS bills (
bill_id TEXT PRIMARY KEY,
bill_year INT
)""")
conn.commit()
logger.info("Base SQLite initialisée et table 'bills' disponible")
return conn
except Exception as e:
logger.exception("Erreur lors de l'initialisation de la base SQLite: %s", e)
raise
def add_entries_to_db(entries: list[tuple[str, int]], conn):
"""
Insère en lot des paires (bill_id, bill_year) dans la table 'bills' avec gestion de conflit sur bill_id.
"""
try:
logger.debug("Insertion batch dans 'bills': %d entrées", len(entries))
conn.executemany(
"""
INSERT INTO bills (bill_id, bill_year)
VALUES (?, ?)
ON CONFLICT(bill_id) DO NOTHING
""",
entries,
)
conn.commit()
logger.info("Insertion batch dans 'bills' validée")
except Exception as e:
logger.exception("Échec d'insertion batch dans 'bills': %s", e)
raise
def get_entries_from_db(conn) -> set[str]:
"""
Récupère l'ensemble des bill_id présents dans la table 'bills' et les retourne sous forme de set[str].
"""
try:
logger.debug("Sélection des bill_id depuis 'bills'")
cursor = conn.execute("SELECT bill_id FROM bills")
rows = cursor.fetchall()
logger.info("Sélection terminée: %d bill_id récupérés", len(rows))
return {row[0] for row in rows}
except Exception as e:
logger.exception("Échec de lecture des bill_id depuis 'bills': %s", e)
raise
def compare_db_to_data(db_data: set[str], data: list[str]) -> list[str]:
"""
Compare une collection d'identifiants 'data' à l'ensemble 'db_data' et retourne la liste des éléments absents de 'db_data'.
"""
missings_current_year = list()
for bill_id in data:
if bill_id not in db_data:
missings_current_year.append(bill_id)
return missings_current_year
def indexer(ids: list[str]) -> list[str]:
"""
Parcourt le répertoire de l'année courante et compare les factures déjà présentes
avec la liste d'IDs renvoyée par OVH. Ne conserve que les factures absentes
ET datées de l'année courante.
Parcourt le répertoire de l'année courante, filtre les factures déjà présentes localement, conserve les factures absentes datées de l'année courante, et enregistre en base celles qui appartiennent à une autre année.
"""
conn = get_conn()
logger.info("Indexation des factures pour l'année %s", YEAR)
target_dir = f"{PATH_OVH}{YEAR}"
try:
@@ -54,10 +123,13 @@ def indexer(ids: list[str]) -> list[str]:
logger.warning("Dossier %s inexistant, aucune facture locale", target_dir)
ids_already_in = []
missing = [x for x in ids if f"{x}.pdf" not in ids_already_in]
missing = compare_db_to_data(
get_entries_from_db(conn), [x for x in ids if f"{x}.pdf" not in ids_already_in]
)
logger.info("%d factures absentes détectées", len(missing))
result: list[str] = []
not_valid_year: list[tuple[str, int]] = list()
for bill_id in missing:
try:
meta = ft.fetch_invoice_content(
@@ -67,12 +139,16 @@ def indexer(ids: list[str]) -> list[str]:
consumer_key=CONSUMER_KEY,
)
except Exception as e:
logger.error("Impossible de récupérer la méta pour %s : %s", bill_id, e)
logger.error("Impossible de récupérer le json pour %s : %s", bill_id, e)
continue
bill_year = datetime.fromisoformat(meta["date"]).year
if bill_year == YEAR:
result.append(bill_id)
else:
not_valid_year.append((bill_id, bill_year))
add_entries_to_db(not_valid_year, conn)
logger.info(f"Ajouter {len(not_valid_year)} entrées a la base de donnée")
logger.info("%d factures retenues pour téléchargement", len(result))
return result

View File

@@ -1,9 +1,403 @@
certifi==2025.8.3
charset-normalizer==3.4.3
about-time==4.2.1
aiodns==3.3.0
aiohappyeyeballs==2.4.4
aiohttp==3.10.11
aiosignal==1.4.0
alive-progress==3.3.0
altgraph==0.17.4
amulet-core==1.9.29
amulet-leveldb==1.0.2
amulet-map-editor==0.10.42
amulet-nbt==2.1.5
annotated-types==0.7.0
anvil-parser==0.9.0
anyio==4.9.0
appdirs==1.4.4
argcomplete==3.6.2
argon2-cffi==25.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
asttokens==3.0.0
async-lru==2.0.5
asyncio-dgram==2.2.0
attrs==25.1.0
autocommand==2.2.2
Automat==24.8.1
autoslot==2022.12.1
babel==2.17.0
backoff==2.2.1
bcc==0.33.0
bcrypt==4.2.1
Beaker==1.12.1
beautifulsoup4==4.13.5
beautifultable==1.1.0
bidict==0.23.1
black==25.1.0
bleach==6.2.0
blinker==1.9.0
blivet==3.12.1
blivet-gui==2.6.0
boilerpy3==1.0.7
Brlapi==0.8.6
Brotli==1.1.0
cattrs==25.1.1
certifi==2022.12.7
cffi==1.17.1
chardet==5.2.0
charset-normalizer==2.0.12
cheroot==10.0.1
CherryPy==18.10.0
chess==1.11.2
click==8.1.8
cloudscraper==1.2.71
colorama==0.4.6
colorlog==6.9.0
comm==0.2.2
constantly==23.10.4
construct==2.5.3
contourpy==1.3.1
crypt_r==3.13.1
cryptography==44.0.0
cson==0.8
cssselect==1.2.0
cupshelpers==1.0
cycler==0.12.1
dasbus==1.7
dbus-fast==2.44.1
dbus-python==1.3.2
dbus_next==0.2.3
debugpy==1.8.14
decorator==5.2.1
defusedxml==0.7.1
deluge==2.2.0
distro==1.9.0
dnf==4.23.0
dnspython==2.7.0
docopt==0.6.2
docstring_parser==0.17.0
email_validator==2.2.0
et_xmlfile==2.0.0
evdev==1.9.1
Events==0.5
executing==2.2.0
fastjsonschema==2.21.1
fedora-third-party==0.10
file-magic==0.4.0
filelock==3.18.0
filetype==1.2.0
Flask==3.1.0
Flask-Bcrypt==1.0.1
Flask-Login==0.6.3
Flask-SocketIO==5.5.1
Flask-SQLAlchemy==3.1.1
Flask-WTF==1.2.2
fonttools==4.55.8
fqdn==1.5.1
fros==1.1
frozendict==2.4.6
frozenlist==1.5.0
fsspec==2025.7.0
future==1.0.0
geographiclib==2.1
GeoIP==1.3.2
geopy==2.4.1
ghunt==2.3.3
git-filter-repo==2.47.0
graphemeu==0.7.2
greenlet==3.1.1
gunicorn==23.0.0
h11==0.16.0
h2==4.3.0
haystack-ai==2.16.1
haystack-experimental==0.12.0
hf-xet==1.1.5
hpack==4.1.0
httpcore==1.0.9
httpx==0.27.2
huggingface-hub==0.34.3
humanize==4.12.0
hyperframe==6.1.0
hyperlink==21.0.0
icmplib==3.0.4
idna==3.10
ImageHash==4.3.2
impacket==0.10.0
importlib_metadata==8.6.1
incremental==24.7.2
inflect==7.5.0
inflection==0.5.1
inkex==1.4.0
instaloader==4.14.2
ipykernel==6.29.5
ipython==9.3.0
ipython_pygments_lexers==1.1.1
iso639==0.1.4
isoduration==20.11.0
itsdangerous==2.2.0
jaraco.classes==3.4.0
jaraco.collections==5.2.1
jaraco.context==6.0.1
jaraco.functools==4.1.0
jaraco.text==4.0.0
jedi==0.19.2
jeepney==0.8.0
Jinja2==3.1.6
jiter==0.10.0
joblib==1.5.1
json5==0.12.0
jsonpickle==3.4.2
jsonpointer==3.0.0
jsonschema==4.23.0
jsonschema-specifications==2024.10.1
jupyter-events==0.12.0
jupyter-lsp==2.2.5
jupyter_client==8.6.3
jupyter_core==5.8.1
jupyter_server==2.16.0
jupyter_server_terminals==0.5.3
jupyterlab==4.4.3
jupyterlab_pygments==0.3.0
jupyterlab_server==2.27.3
keyring==25.6.0
kiwisolver==1.4.8
langtable==0.0.69
lazy-imports==0.3.1
ldap3==2.9.1
ldapdomaindump==0.10.0
libcomps==0.1.22
libdnf==0.74.0
libtorrent==2.0.11
libvirt-python==11.0.0
logging==0.4.9.6
louis==3.33.0
lutris==0.5.19
lxml==5.3.2
lz4==4.4.4
Mako==1.2.3
markdown-it-py==4.0.0
MarkupSafe==3.0.2
matplotlib==3.10.0
matplotlib-inline==0.1.7
maxminddb==2.8.2
mcstatus==11.1.1
mdurl==0.1.2
meson==1.7.2
minecraft-resource-pack==1.4.6
minecraft_ping==0.0.4
mistune==3.1.3
moddb==0.12.0
mopidy==4.0.0a4
Mopidy-Iris==3.69.3
more-itertools==10.5.0
mpmath==1.3.0
msgpack==1.1.0
multidict==6.1.0
mutagen==1.47.0
mutf8==1.0.6
mypy_extensions==1.1.0
nbclient==0.10.2
nbconvert==7.16.6
nbformat==5.10.4
NBT==1.5.1
nest-asyncio==1.6.0
netaddr==1.3.0
networkx==3.4.2
nftables==0.1
notebook==7.4.3
notebook_shim==0.2.4
num2words==0.5.14
numpy==1.26.4
oauthlib==3.3.1
olefile==0.47
openai==1.98.0
openpyxl==3.1.5
outcome==1.3.0.post0
overrides==7.7.0
ovh==1.2.0
python-dotenv==1.1.1
packaging==24.2
pandas==2.3.1
pandocfilters==1.5.1
parso==0.8.4
Paste==3.10.1
pathspec==0.12.1
pefile==2024.8.26
perf==0.1
pexpect==4.9.0
phonenumbers==9.0.13
pid==2.2.3
pillow==10.4.0
pipx==1.7.1
platformdirs==3.11.0
ply==3.11
portalocker==2.10.1
portend==3.2.1
posthog==6.3.1
productmd==1.45
prometheus_client==0.22.1
prompt_toolkit==3.0.51
prompthub-py==4.0.0
protobuf==5.29.5
proton-core==0.6.0
proton-keyring-linux==0.2.0
proton-vpn-api-core==0.45.6
proton-vpn-daemon==0.12.0
proton-vpn-gtk-app==4.10.0b0
proton-vpn-lib==0.1.1
proton-vpn-network-manager==0.12.15
protonvpn_cli==2.2.12
psutil==7.0.0
ptyprocess==0.7.0
publicsuffixlist==1.0.2.20250830
pure_eval==0.2.3
pwquality==1.4.5
pyasn1==0.4.8
pyasn1_modules==0.4.1
PyAudio==0.2.13
pycairo==1.25.1
pycares==4.10.0
pycparser==2.20
pycrypto==2.6.1
pycryptodomex==3.23.0
pycups==2.0.4
pydantic==1.10.22
pydantic_core==2.33.2
pyenchant==3.2.2
pygame==2.6.1
Pygments==2.19.0
PyGObject==3.50.0
pyinotify==0.9.6
pyinstaller==6.11.1
pyinstaller-hooks-contrib==2025.1
pykickstart==3.62
pykka==4.2.0
PyMCTranslate==1.2.33
pymunk==6.11.1
PyMuPDF==1.26.4
PyNaCl==1.5.0
pynvim==0.5.2
PyOpenGL==3.1.9
pyOpenSSL==25.0.0
pyparsing==3.2.1
pyparted==3.13.0
pypresence==4.3.0
PyQt5==5.15.11
PyQt5_sip==12.16.1
PyRoxy @ git+https://github.com/MatrixTM/PyRoxy.git@ea0f88dbc0573292ba5672124f69e4e7a31b544d
pyserial==3.5
PySocks==1.7.1
python-augeas==1.2.0
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
python-engineio==4.11.2
python-gnupg==0.5.4
python-json-logger==3.3.0
python-linux-procfs==0.7.3
python-meh==0.52
python-pam==2.0.2
python-pptx==1.0.2
python-ptrace==0.9.9
python-socketio==5.12.1
pythondialog==3.5.3
pytz==2025.2
pyudev==0.24.3
PyWavelets==1.9.0
pyxdg==0.27
PyYAML==6.0.2
pyynl @ file:///builddir/build/BUILD/kernel-6.16.3-build/kernel-6.16.3/linux-6.16.3-200.fc42.x86_64/tools/net/ynl
pyzmq==27.0.0
quantulum3==0.9.2
rank-bm25==0.2.2
RapidFuzz==3.11.0
referencing==0.36.2
regex==2024.11.6
rencode==1.0.6
reportlab==4.4.3
requests==2.32.5
requests-cache==0.9.8
requests-file==2.0.0
requests-ftp==0.3.1
requests-futures==1.0.2
requests-oauthlib==2.0.0
urllib3==2.5.0
requests-toolbelt==1.0.0
rfc3339-validator==0.1.4
rfc3986-validator==0.1.1
rich==13.9.4
rich-argparse==1.7.1
rpds-py==0.25.0
rpm==4.20.1
safetensors==0.5.3
scikit-learn==1.7.1
scipy==1.16.0
scour==0.38.2
SecretStorage==3.3.3
secure==1.0.1
selenium==4.34.0
selinux @ file:///builddir/build/BUILD/libselinux-3.8-build/libselinux-3.8/src
Send2Trash==1.8.3
sentry-sdk==2.21.0
sepolicy @ file:///builddir/build/BUILD/policycoreutils-3.8-build/selinux-3.8/python/sepolicy
service-identity==24.2.0
setools==4.5.1
setuptools==80.9.0
sherlock==0.4.1
sherlock-project==0.15.0
shtab==1.7.1
simple-websocket==1.1.0
simpleaudio==1.0.4
simpleline==1.9.0
six==1.17.0
Slowloris==0.2.6
sniffio==1.3.1
sortedcontainers==2.4.0
sos==4.10.0
soupsieve==2.7
speg==0.3
SQLAlchemy==2.0.38
sseclient-py==1.8.0
stack-data==0.6.3
stem==1.8.2
sympy==1.13.3
systemd-python==235
tempora==5.8.1
tenacity==9.1.2
terminado==0.18.1
threadpoolctl==3.6.0
tiktoken==0.9.0
tinycss2==1.4.0
tokenizers==0.21.4
torch==2.7.1+cpu
torchaudio==2.7.1+cpu
torchvision==0.22.1+cpu
tornado==6.4.1
tqdm==4.67.1
traitlets==5.14.3
transformers==4.54.1
trio==0.30.0
trio-websocket==0.12.2
Twisted==24.11.0
typeguard==4.4.4
types-python-dateutil==2.9.0.20250516
typing-inspection==0.4.1
typing_extensions==4.14.1
tzdata==2025.2
uri-template==1.3.0
url-normalize==2.2.1
urllib3==1.26.20
userpath==1.9.2
wcwidth==0.2.13
webcolors==24.11.1
webencodings==0.5.1
websocket-client==1.8.0
websockets==14.2
Werkzeug==3.1.3
wheel==0.45.1
wsproto==1.2.0
WTForms==3.2.1
wxPython==4.2.3
xkbregistry==0.3
XlsxWriter==3.2.3
yarl==1.13.1
yt-dlp==2025.8.27
zc.lockfile==3.0.post1
zipp==3.21.0
zope.interface==7.2