bitdruid
diff --git a/‎README.md
+57-43 b/‎README.md
+57-43
diff --git a/‎pyproject.toml
+2-1 b/‎pyproject.toml
+2-1
diff --git a/‎pywaybackup/Arguments.py
+5-1 b/‎pywaybackup/Arguments.py
+5-1
diff --git a/‎pywaybackup/Exception.py
+16-19 b/‎pywaybackup/Exception.py
+16-19
diff --git a/‎pywaybackup/SnapshotCollection.py
+16-5 b/‎pywaybackup/SnapshotCollection.py
+16-5
diff --git a/‎pywaybackup/Verbosity.py
+7-5 b/‎pywaybackup/Verbosity.py
+7-5
@@ -3,7 +3,6 @@
 [![PyPI](https://img.shields.io/pypi/v/pywaybackup)](https://pypi.org/project/pywaybackup/)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/pywaybackup)](https://pypi.org/project/pywaybackup/)
 ![Python Version](https://img.shields.io/badge/Python-3.8-blue)
-![Python_Sqlite3 Version](https://img.shields.io/badge/Python_Sqlite3-3.25-blue)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
 Downloading archived web pages from the [Wayback Machine](https://archive.org/web/).
@@ -29,12 +28,15 @@ This tool allows you to download content from the Wayback Machine (archive.org).
    ```pip install .```
    - in a virtual env or use `--break-system-package`
 
-## Usage infos - important notes
+## Important notes
 
 - Linux recommended: On Windows machines, the path length is limited. This can only be overcome by editing the registry. Files that exceed the path length will not be downloaded.
 - If you query an explicit file (e.g. a query-string `?query=this` or `login.html`), the `--explicit`-argument is recommended as a wildcard query may lead to an empty result.
 - The tool uses a sqlite database to handle snapshots. The database will only persist while the download is running.
 
+<br>
+<br>
+
 ## Arguments
 
 - `-h`, `--help`: Show the help message and exit.
@@ -55,7 +57,7 @@ This tool allows you to download content from the Wayback Machine (archive.org).
 - **`-s`**, **`--save`**:<br>
   Save a page to the Wayback Machine. (beta)
 
-### Optional query parameters
+#### Optional query parameters
 
 - **`-e`**, **`--explicit`**:<br>
   Only download the explicit given URL. No wildcard subdomains or paths. Use e.g. to get root-only snapshots. This is recommended for explicit files like `login.html` or `?query=this`.
@@ -76,7 +78,9 @@ Limits the amount of snapshots to query from the CDX server. If an existing CDX
    - **`--end`**:<br>
      Timestamp to end searching.
 
-### Behavior manipulation
+### Optional
+
+#### Behavior Manipulation
 
 - **`-o`**, **`--output`**:<br>
 Defaults to `waybackup_snapshots` in the current directory. The folder where downloaded files will be saved.
@@ -105,55 +109,64 @@ Specifies delay between download requests in seconds. Default is no delay (0).
 <!-- - **`--convert-links`**:<br>
 If set, all links in the downloaded files will be converted to local links. This is useful for offline browsing. The links are converted to the local path structure. Show output with `--verbosity trace`. -->
 
-### Special:
+#### Job Handling:
 
 - **`--reset`**:  
   If set, the job will be reset, and any existing `cdx`, `db`, `csv` files will be **deleted**. This allows you to start the job from scratch without considering previously downloaded data.
 
 - **`--keep`**:  
   If set, all files will be kept after the job is finished. This includes the `cdx` and `db` file. Without this argument, they will be deleted if the job finished successfully.
 
-# Usage 
+<br>
+<br>
+
+## Usage
 
 ### Handling Interrupted Jobs
-When a job is interrupted (by any reason), `pywaybackup` is designed to resume the job from where it left off. It automatically detects existing job data (based on the URL and <u>**optional query parameters**</u> - including output directory) and resumes the process without requiring manual intervention. Here's how the tool handles different scenarios:
-
-- **Default Behavior:** 
-  - On restarting the same job (same URL, <u>**optional query parameters**</u>, and output directory), the tool will:
-    - Reuse the existing `.cdx` and `.db` files.
-    - Resume downloading snapshots from the last successful point.
-    - Skip previously downloaded files to save time and resources.
-
-- **Manual Reset with `--reset`:** 
-  - This command deletes any existing `.cdx` and `.db` files associated with the job and starts the process from scratch.
-  - Useful if:
-    - The previous data is corrupted.
-    - You want to re-query the snapshots without considering previously downloaded data.
-
-- **Preserving Job Data with `--keep`:** 
-  - Normally, `.cdx` and `.db` files are deleted after the job finishes successfully.
-  - Use `--keep` to retain these files for future use (e.g., re-analysis or extending the query later).
-
-> **Note1:** The resumption process only works if the output directory remains the same as the one used during the initial job.
-> 
-> **Note2:** `--reset` will NOT delete the already downloaded files for now. You have to remove them 'by hand'.
-  
-### Example
 
-1. Start downloading all available snapshots:<br>`waybackup -u https://example.com -a`
-2. Interrupt the process `CTRL+C`<br>
-3. The tool will detect the existing job data and resume downloading from the last completed point:<br>`waybackup -u https://example.com -a`
-> **Important:** `waybackup -u https://example.com -c` -> The tool will NOT resume because a necessary identifier-changed
-4. This deletes any existing .cdx and .db files associated with the job and starts the process from scratch:<br>`waybackup -u https://example.com -a --reset`
-5. This ensures all job-related files are kept for future use, such as re-analysis or extending the query later:<br>`waybackup -u https://example.com -a --keep`
+`pywaybackup` resumes interrupted jobs. The tool automatically continues from where it left off.
+
+- Detects existing `.cdx` and `.db` files in an `output dir` to resume downloading from the last successful point.
+- Compares `URL`, `mode`, and `optional query parameters` to ensure automatic resumption.
+- Skips previously downloaded files to save time.
+> **Note:** Changing URL, mode selection, query parameters or output prevents automatic resumption.
+
+#### Resetting a Job (`--reset`)
+- Deletes `.cdx` and `.db` files and restarts the process from scratch.
+- Does **not** remove already downloaded files.
+- `waybackup -u https://example.com -a --reset`
+
+#### Keeping Job Data (`--keep`)
+- Normally, `.cdx` and `.db` files are deleted after a successful job.
+- `--keep` preserves them for future re-analysis or extending the query.
+- `waybackup -u https://example.com -a --keep`
 
-## Output path structure
+<br>
+<br>
+
+## Examples
+
+1. Download a specific single snapshot of all available files (starting from root):<br>
+`waybackup -u https://example.com -a --start 20210101000000 --end 20210101000000`
+2. Download a specific single snapshot of all available files (starting from a subdirectory):<br>
+`waybackup -u https://example.com/subdir1/subdir2/assets/ -a --start 20210101000000 --end 20210101000000`
+3. Download a specific single snapshot of the exact given URL (no subdirs):<br>
+`waybackup -u https://example.com -a --start 20210101000000 --end 20210101000000 --explicit`
+4. Download all snapshots of all available files in the given range:<br>
+`waybackup -u https://example.com -a --start 20210101000000 --end 20231122000000`
+
+<br>
+<br>
+
+## Output
+
+### Path Structure
 
 The output path is currently structured as follows by an example for the query:<br>
-`http://example.com/subdir1/subdir2/assets/`:
+`http://example.com/subdir1/subdir2/assets/`
 <br><br>
 For the first and last version (`-f` or `-l`):
-- The requested path will only include all files/folders starting from your query-path.
+- Will only include all files/folders starting from your query-path.
 ```
 your/path/waybackup_snapshots/
 └── the_root_of_your_query/ (example.com/)
@@ -165,7 +178,7 @@ your/path/waybackup_snapshots/
                 ...
 ```
 For all versions (`-a`):
-- Will currently create a folder named as the root of your query. Inside this folder, you will find all timestamps and per timestamp the path you requested.
+- Will create a folder named as the root of your query. Inside this folder, you will find all timestamps and per timestamp the path you requested.
 ```
 your/path/waybackup_snapshots/
 └── the_root_of_your_query/ (example.com/)
@@ -184,7 +197,7 @@ your/path/waybackup_snapshots/
     ...
 ```
 
-## CSV Output
+### CSV
 
 Each snapshot is stored with the following keys/values. These are either stored in a sqlite database while the download is running or saved into a CSV file after the download is finished.
 
@@ -210,11 +223,12 @@ For download queries:
 
 Exceptions will be written into `waybackup_error.log` (each run overwrites the file).
 
-### Known ToDos
-
-- [ ] currently there is no logic to handle if both a http and https version of a page is available
+<br>
+<br>
 
 ## Contributing
 
 I'm always happy for some feature requests to improve the usability of this tool.
 Feel free to give suggestions and report issues. Project is still far from being perfect.
+
+> Please PR from dev into dev.
@@ -7,7 +7,7 @@ packages = ["pywaybackup"]
 
 [project]
 name = "pywaybackup"
-version = "3.0.2"
+version = "3.1.0"
 description = "Query and download archive.org as simple as possible."
 authors = [
     { name = "bitdruid", email = "[email protected]" }
@@ -16,6 +16,7 @@ license = { file = "LICENSE" }
 readme = "README.md"
 requires-python = ">=3.8"
 dependencies = [
+    "pysqlite3-binary==0.5.4",
     "requests==2.31.0",
     "tqdm==4.66.2",
     "python-magic==0.4.27; sys_platform == 'linux'",
 
@@ -2,9 +2,11 @@
 import sys
 import os
 import argparse
+
 from importlib.metadata import version
 
 from pywaybackup.helper import url_split, sanitize_filename
+from pywaybackup.Exception import Exception as ex
 
 class Arguments:
 
@@ -73,7 +75,7 @@ def init(cls):
 
         if cls.output is None:
             cls.output = os.path.join(os.getcwd(), "waybackup_snapshots")
-        os.makedirs(cls.output, exist_ok=True)
+        os.makedirs(cls.output, exist_ok=True) if not cls.save else None
 
         if cls.log is True:
             cls.log = os.path.join(cls.output, f"waybackup_{sanitize_filename(cls.url)}.log")
@@ -84,6 +86,8 @@ def init(cls):
             cls.mode = "last"
         if cls.first:
             cls.mode = "first"
+        if cls.save:
+            cls.mode = "save"
 
         if cls.filetype:
             cls.filetype = [ft.lower().strip() for ft in cls.filetype.split(",")]
 
@@ -1,34 +1,33 @@
-
 import sys
 import os
-from datetime import datetime
+import re
 import linecache
 import traceback
-
-import re
+from datetime import datetime
 
 from importlib.metadata import version
 
-class Exception:
 
+class Exception:
     new_debug = True
     output = None
     command = None
 
     @classmethod
     def init(cls, output=None, command=None):
-        sys.excepthook = cls.exception_handler # set custom exception handler (uncaught exceptions)
+        sys.excepthook = (
+            cls.exception_handler
+        )  # set custom exception handler (uncaught exceptions)
         cls.output = output
         cls.command = command
 
     @classmethod
     def exception(cls, message: str, e: Exception, tb=None):
         custom_tb = sys.exc_info()[-1] if tb is None else tb
-        original_tb = cls.relativate_path("".join(traceback.format_exception(type(e), e, e.__traceback__)))
-        exception_message = (
-            "-------------------------\n" 
-            f"!-- Exception: {message}\n"
+        original_tb = cls.relativate_path(
+            "".join(traceback.format_exception(type(e), e, e.__traceback__))
         )
+        exception_message = f"-------------------------\n!-- Exception: {message}\n"
         if custom_tb is not None:
             while custom_tb.tb_next:  # loop to last traceback frame
                 custom_tb = custom_tb.tb_next
@@ -46,10 +45,7 @@ def exception(cls, message: str, e: Exception, tb=None):
             )
         else:
             exception_message += "!-- Traceback is None\n"
-        exception_message += (
-            f"!-- Description: {e}\n"
-            "-------------------------"
-        )
+        exception_message += f"!-- Description: {e}\n-------------------------"
         print(exception_message)
         debug_file = os.path.join(cls.output, "waybackup_error.log")
         print(f"Exception log: {debug_file}")
@@ -85,10 +81,10 @@ def relativate_path(cls, input: str) -> str:
             if os.path.isfile(input):  # case single path
                 return os.path.relpath(input, os.getcwd())
             input_modified = ""
-            input_lines = input.split('\n')
-            if len(input_lines) == 1: # case single line
+            input_lines = input.split("\n")
+            if len(input_lines) == 1:  # case single line
                 return input
-            for line in input.split('\n'): # case multiple lines
+            for line in input.split("\n"):  # case multiple lines
                 match = path_pattern.search(line)
                 if match:
                     original_path = match.group(1)
@@ -104,5 +100,6 @@ def exception_handler(exception_type, exception, traceback):
         if issubclass(exception_type, KeyboardInterrupt):
             sys.__excepthook__(exception_type, exception, traceback)
             return
-        Exception.exception("UNCAUGHT EXCEPTION", exception, traceback) # uncaught exceptions also with custom scheme
-    
+        Exception.exception(
+            "UNCAUGHT EXCEPTION", exception, traceback
+        )  # uncaught exceptions also with custom scheme
@@ -1,11 +1,13 @@
-from pywaybackup.Verbosity import Verbosity as vb
-from pywaybackup.helper import url_split
-from pywaybackup.db import Database
-from tqdm import tqdm
 import json
 import csv
 import os
 
+from tqdm import tqdm
+
+from pywaybackup.Verbosity import Verbosity as vb
+from pywaybackup.helper import url_split
+from pywaybackup.db import Database
+
 class SnapshotCollection:
     """
     Represents the interaction with the snapshot-collection contained in the snapshot database.
@@ -292,12 +294,21 @@ def get_snapshot(connection):
         """
         Get a snapshot-row from the snapshot table with response NULL. (not processed)
         """
+        # mark as locked for other workers // only visual because get_snapshot fetches by NULL
         connection.cursor.execute(
             """
-            SELECT rowid, * FROM snapshot_tbl WHERE response IS NULL LIMIT 1
+            UPDATE snapshot_tbl
+            SET response = 'LOCK'
+            WHERE rowid = (
+                SELECT rowid FROM snapshot_tbl 
+                WHERE response IS NULL
+                LIMIT 1
+            )
+            RETURNING rowid, *;
             """
         )
         row = connection.cursor.fetchone()
+        connection.conn.commit()
         return row
 
     @classmethod
 
@@ -1,4 +1,3 @@
-import sys
 from tqdm import tqdm
 
 class Verbosity:
@@ -63,21 +62,24 @@ def progress(cls, progress: int, maxval: int = None):
                 cls.pbar.refresh()
 
     @classmethod
-    def generate_logline(cls, status: str = "", type: str = "", message: str = ""):
+    def generate_logline(cls, status: str, type: str, message: str):
         """
-        STATUS     -> TYPE: MESSAGE
+        STATUS     ➔ TYPE: MESSAGE
         """
 
         if not status and not type:
             return message
 
-        status_length = 11
+        status_length = 10
         type_length = 5
 
         status = status.ljust(status_length)
+        status = f"{status} -> "
+
         type = type.ljust(type_length)
+        type = f"{type}: " if type.strip() else ""
 
-        log_entry = f"{status} -> {type}: {message}"
+        log_entry = f"{status}{type}{message}"
 
         return log_entry