Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ dependencies = [
"shtab",
"websockets>=10.1",
"babel",
"PyYAML",
"importlib_resources"
]

[project.scripts]
Expand Down
7 changes: 2 additions & 5 deletions pytr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,15 @@
import ssl
import requests
import websockets

from ecdsa import NIST256p, SigningKey
from ecdsa.util import sigencode_der
from http.cookiejar import MozillaCookieJar

from pytr.utils import get_logger
from pytr.app_path import *
Comment thread
Katzmann1983 marked this conversation as resolved.
Outdated


home = pathlib.Path.home()
BASE_DIR = home / '.pytr'
CREDENTIALS_FILE = BASE_DIR / 'credentials'
KEY_FILE = BASE_DIR / 'keyfile.pem'
COOKIES_FILE = BASE_DIR / 'cookies.txt'


class TradeRepublicApi:
Expand Down
10 changes: 10 additions & 0 deletions pytr/app_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import pathlib
Comment thread
Katzmann1983 marked this conversation as resolved.

home = pathlib.Path.home()
BASE_DIR = home / '.pytr'

CREDENTIALS_FILE = BASE_DIR / 'credentials'
KEY_FILE = BASE_DIR / 'keyfile.pem'
COOKIES_FILE = BASE_DIR / 'cookies.txt'

DESTINATION_CONFIG_FILE = BASE_DIR / 'file_destination_config.yaml'
Empty file added pytr/config/__init__.py
Empty file.
387 changes: 387 additions & 0 deletions pytr/config/file_destination_config__template.yaml

Large diffs are not rendered by default.

88 changes: 26 additions & 62 deletions pytr/dl.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
import re
import os

from concurrent.futures import as_completed
from pathlib import Path
from requests_futures.sessions import FuturesSession
from requests import session
from datetime import datetime

from pathvalidate import sanitize_filepath

from pytr.utils import preview, get_logger
from pytr.api import TradeRepublicError
from pytr.timeline import Timeline
from pytr.file_destination_provider import FileDestinationProvider

class DL:
def __init__(
self,
tr,
output_path,
filename_fmt,
file_destination_provider:FileDestinationProvider,
Comment thread
Katzmann1983 marked this conversation as resolved.
Outdated
since_timestamp=0,
history_file='pytr_history',
max_workers=8,
Expand All @@ -25,13 +27,13 @@ def __init__(
'''
tr: api object
output_path: name of the directory where the downloaded files are saved
filename_fmt: format string to customize the file names
file_destination_provider: The destination provider for the file path and file names based on the event type and other parameters.
since_timestamp: downloaded files since this date (unix timestamp)
'''
self.tr = tr
self.output_path = Path(output_path)
self.history_file = self.output_path / history_file
self.filename_fmt = filename_fmt
self.file_destination_provider = file_destination_provider
self.since_timestamp = since_timestamp
self.universal_filepath = universal_filepath

Expand Down Expand Up @@ -83,75 +85,37 @@ async def dl_loop(self):
else:
self.log.warning(f"unmatched subscription of type '{subscription['type']}':\n{preview(response)}")

def dl_doc(self, doc, titleText, subtitleText, subfolder=None):
def dl_doc(self, doc, event_type: str, event_title: str, event_subtitle: str, section_title: str, timestamp: datetime):
'''
send asynchronous request, append future with filepath to self.futures
'''
doc_url = doc['action']['payload']
if subtitleText is None:
subtitleText = ''

try:
date = doc['detail']
iso_date = '-'.join(date.split('.')[::-1])
except KeyError:
date = ''
iso_date = ''
doc_id = doc['id']

# extract time from subtitleText
try:
time = re.findall('um (\\d+:\\d+) Uhr', subtitleText)
if time == []:
time = ''
else:
time = f' {time[0]}'
except TypeError:
time = ''

if subfolder is not None:
directory = self.output_path / subfolder
else:
directory = self.output_path

# If doc_type is something like 'Kosteninformation 2', then strip the 2 and save it in doc_type_num
doc_type = doc['title'].rsplit(' ')
if doc_type[-1].isnumeric() is True:
doc_type_num = f' {doc_type.pop()}'
else:
doc_type_num = ''

doc_type = ' '.join(doc_type)
titleText = titleText.replace('\n', '').replace('/', '-')
subtitleText = subtitleText.replace('\n', '').replace('/', '-')

filename = self.filename_fmt.format(
iso_date=iso_date, time=time, title=titleText, subtitle=subtitleText, doc_num=doc_type_num, id=doc_id
)

filename_with_doc_id = filename + f' ({doc_id})'

if doc_type in ['Kontoauszug', 'Depotauszug']:
filepath = directory / 'Abschlüsse' / f'{filename}' / f'{doc_type}.pdf'
filepath_with_doc_id = directory / 'Abschlüsse' / f'{filename_with_doc_id}' / f'{doc_type}.pdf'
else:
filepath = directory / doc_type / f'{filename}.pdf'
filepath_with_doc_id = directory / doc_type / f'{filename_with_doc_id}.pdf'
document_title = doc.get('title', '')


variables = {}
variables['iso_date'] = timestamp.strftime('%Y-%m-%d')
variables['iso_date_year'] = timestamp.strftime('%Y')
variables['iso_date_month'] = timestamp.strftime('%m')
variables['iso_date_day'] = timestamp.strftime('%d')
variables['iso_time'] = timestamp.strftime('%H-%M')

filepath = self.file_destination_provider.get_file_path(event_type, event_title, event_subtitle, section_title, document_title, variables)
if filepath.endswith('.pdf') is False:
filepath = f'{filepath}.pdf'

filepath = Path(os.path.join( self.output_path , filepath))

if self.universal_filepath:
filepath = sanitize_filepath(filepath, '_', 'universal')
filepath_with_doc_id = sanitize_filepath(filepath_with_doc_id, '_', 'universal')
else:
filepath = sanitize_filepath(filepath, '_', 'auto')
filepath_with_doc_id = sanitize_filepath(filepath_with_doc_id, '_', 'auto')

Comment thread
Katzmann1983 marked this conversation as resolved.
Outdated

if filepath in self.filepaths:
self.log.debug(f'File {filepath} already in queue. Append document id {doc_id}...')
if filepath_with_doc_id in self.filepaths:
self.log.debug(f'File {filepath_with_doc_id} already in queue. Skipping...')
return
else:
filepath = filepath_with_doc_id
self.log.debug(f'File {filepath} already in queue. Skipping...')
return

doc['local filepath'] = str(filepath)
self.filepaths.append(filepath)

Expand Down
185 changes: 185 additions & 0 deletions pytr/file_destination_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import os
import re
import shutil
import pytr.config

from importlib_resources import files
from yaml import safe_load
from pathlib import Path
from pytr.app_path import *
from pytr.utils import get_logger

# ToDo Question if we want to use LibYAML which is faster than pure Python version but another dependency
try:
from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
from yaml import Loader, Dumper


ALL_CONFIG = "all"
UNKNOWN_CONFIG = "unknown"

TEMPLATE_FILE_NAME ="file_destination_config__template.yaml"


class DefaultFormateValue(dict):
def __missing__(self, key):
return key.join("{}")


class DestinationConfig:
def __init__(self, config_name: str, filename: str, path: str = None, pattern: list = None):
self.config_name = config_name
self.filename = filename
self.path = path
self.pattern = pattern


class Pattern:
def __init__(self, event_type: str, event_subtitle: str, event_title: str, section_title: str, document_title: str):
self.event_type = event_type
self.event_subtitle = event_subtitle
self.event_title = event_title
self.section_title = section_title
self.document_title = document_title


class FileDestinationProvider:

def __init__(self):
'''
A provider for file path and file names based on the event type and other parameters.
'''
self._log = get_logger(__name__)

config_file_path = Path(DESTINATION_CONFIG_FILE)
if config_file_path.is_file() == False:
self.__create_default_config(config_file_path)

config_file = open(config_file_path, "r", encoding="utf8")
destination_config = safe_load(config_file)

self.__validate_config(destination_config)

destinations = destination_config["destination"]

self._destination_configs: list[DestinationConfig] = []

for config_name in destinations:
if config_name == ALL_CONFIG:
self._all_file_config = DestinationConfig(
ALL_CONFIG, destinations[ALL_CONFIG]["filename"])
elif config_name == UNKNOWN_CONFIG:
self._unknown_file_config = DestinationConfig(
UNKNOWN_CONFIG, destinations[UNKNOWN_CONFIG]["filename"], destinations[UNKNOWN_CONFIG]["path"])
else:
patterns = self.__extract_pattern(
destinations[config_name].get("pattern", None))
self._destination_configs.append(DestinationConfig(
config_name, destinations[config_name].get("filename", None), destinations[config_name].get("path", None), patterns))

def get_file_path(self, event_type: str, event_title: str, event_subtitle: str, section_title: str, document_title: str, variables: dict) -> str:
'''
Get the file path based on the event type and other parameters.

Parameters:
event_type (str): The event type
event_title (str): The event title
event_subtitle (str): The event subtitle
section_title (str): The section title
document_title (str): The document title
variables (dict): The variables->value dict to be used in the file path and file name format.
'''

matching_configs = self._destination_configs.copy()

# Maybe this can be improved looks like a lot of code duplication ... on the other hand using a
# dict for the parameters for example and iterate over it would make it harder to understand
if event_type is not None:
matching_configs = list(filter(lambda config: self.__is_matching_config(
config, "event_type", event_type), matching_configs))
variables["event_type"] = event_type

if event_title is not None:
matching_configs = list(filter(lambda config: self.__is_matching_config(
config, "event_title", event_title), matching_configs))
variables["event_title"] = event_title

if event_subtitle is not None:
matching_configs = list(filter(lambda config: self.__is_matching_config(
config, "event_subtitle", event_subtitle), matching_configs))
variables["event_subtitle"] = event_subtitle

if section_title is not None:
matching_configs = list(filter(lambda config: self.__is_matching_config(
config, "section_title", section_title), matching_configs))
variables["section_title"] = section_title

if document_title is not None:
matching_configs = list(filter(lambda config: self.__is_matching_config(
config, "document_title", document_title), matching_configs))
variables["document_title"] = document_title

if len(matching_configs) == 0:
self._log.debug(
f"No destination config found for the given parameters: event_type:{event_type}, event_title:{event_title},event_subtitle:{event_subtitle},section_title:{section_title},document_title:{document_title}")
return self.__create_file_path(self._unknown_file_config, variables)

if len(matching_configs) > 1:
self._log.debug(f"Multiple Destination Patterns where found. Using 'unknown' config! Parameter: event_type:{event_type}, event_title:{event_title},event_subtitle:{event_subtitle},section_title:{section_title},document_title:{document_title}")
return self.__create_file_path(self._unknown_file_config, variables)

return self.__create_file_path(matching_configs[0], variables)

def __is_matching_config(self, config: DestinationConfig, key: str, value: str):
for pattern in config.pattern:
attribute = getattr(pattern, key)
if attribute is None or re.match(attribute, value):
return True

return False

def __create_file_path(self, config: DestinationConfig, variables: dict):
formate_variables = DefaultFormateValue(variables)

path = config.path
filename = config.filename
if filename is None:
filename = self._all_file_config.filename

return os.path.join(path, filename).format_map(formate_variables)

def __extract_pattern(self, pattern_config: list) -> list:
patterns = []
for pattern in pattern_config:
patterns.append(Pattern(pattern.get("event_type", None),
pattern.get("event_subtitle", None),
pattern.get("event_title", None),
pattern.get("section_title", None),
pattern.get("document_title", None)))

return patterns

def __validate_config(self, destination_config: dict):
if "destination" not in destination_config:
raise ValueError("'destination' key not found in config file")

destinations = destination_config["destination"]

# Check if default config is present
if ALL_CONFIG not in destinations or "filename" not in destinations[ALL_CONFIG]:
raise ValueError(
"'all' config not found or filename not not present in default config")

if UNKNOWN_CONFIG not in destinations or "filename" not in destinations[UNKNOWN_CONFIG] or "path" not in destinations[UNKNOWN_CONFIG]:
raise ValueError(
"'unknown' config not found or filename/path not not present in unknown config")

for config_name in destinations:
if config_name != ALL_CONFIG and "path" not in destinations[config_name]:
raise ValueError(
f"'{config_name}' has no path defined in destination config")

def __create_default_config(self, config_file_path: Path):
path = files(pytr.config).joinpath(TEMPLATE_FILE_NAME)
shutil.copyfile(path, config_file_path)
Loading