"""
Server components of the LabInform datasafe.
Different server components can be distinguished:
* user-facing components (frontends)
* storage components (backends)
Note that "user" is a broad term here, meaning any person and program
accessing the datasafe. In this respect, the clients contained in
:mod:`datasafe.client` are users as well.
The backend components deal with the actual storage of data (in the file
system) and the access to them.
Frontends
=========
Frontends allow a "user" (mostly another program) to access the datasafe,
without needing any details of how the data are actually stored.
Currently, there are two frontends implemented, that have different use cases:
* :class:`Server`
General frontend that can be used locally with
:class:`datasafe.client.LocalClient`.
* :class:`HTTPServerAPI`
API for the HTTP server running via flask.
HTTP frontend that can be used via HTTP, *e.g.* using the
:class:`datasafe.client.HTTPClient` class. Using HTTP, this allows
generally to completely separate client and server in terms of their
locations and access data even remotely. However, keep in mind that remote
access comes with security implications that are currently not dealt with.
The actual HTTP server is created with the function
:func:`create_http_server`, but the API class is the interesting part here.
Backends
========
Backends deal with actually storing the data.
Currently, there is only one backend implemented:
* :class:`StorageBackend`
A backend using the file system for storing data.
Things to decide
================
Some things that need to be decided about:
* Where to store configuration?
At least the base directory for the datasafe needs to be defined in some way.
Other configuration values could be the issuer (number after the "42." of
a LOI)
Perhaps one could store the configuration in a separate configuration class
to start with and see how this goes...
Module documentation
====================
"""
import os
import shutil
import tempfile
from flask import Flask, request
from flask.views import MethodView
from datasafe import configuration
import datasafe.loi as loi_
from datasafe.exceptions import (
MissingPathError,
MissingContentError,
MissingLoiError,
InvalidLoiError,
ExistingFileError,
LoiNotFoundError,
NoFileError,
)
from datasafe.manifest import Manifest
from datasafe.utils import change_working_dir
[docs]
class Server:
"""
Server part of the datasafe.
The server interacts with the storage backend to store and retrieve
contents and provides the user interface.
It retrieves datasets, stores them and should check,
whether its content is complete and not compromised.
The transfer occurs as bytes of the zipped dataset that is received by
the server, decoded, unzipped, and archived into the correct directory.
Attributes
----------
storage : :class:`StorageBackend`
loi : :class:`datasafe.loi.Parser`
"""
def __init__(self):
self.storage = StorageBackend()
self.loi = loi_.Parser()
self._loi_checker = loi_.LoiChecker()
[docs]
def new(self, loi=""):
"""
Create new LOI.
The storage corresponding to the LOI will be created and the LOI
returned if successful. This does, however, *not* add any data to
the datasafe. Therefore, calling :meth:`new` will usually be
followed by calling :meth:`upload` at some later point. On the other
hand, before calling :meth:`upload`, you *need to* call :meth:`new`
to create the new LOI storage space.
Parameters
----------
loi : :class:`str`
LOI for which the resource should be created
Returns
-------
loi : :class:`str`
LOI the resource has been created for
Raises
------
datasafe.exceptions.MissingLoiError
Raised if no LOI is provided
datasafe.exceptions.InvalidLoiError
Raised if LOI is not valid (for the given operation)
"""
if not loi:
raise MissingLoiError("No LOI provided.")
self._check_loi(loi=loi, validate=False)
id_parts = self.loi.split_id()
if id_parts[0] != "exp":
raise InvalidLoiError("Loi ist not a valid experiment LOI.")
self._loi_checker.ignore_check = "LoiMeasurementNumberChecker"
if not self._loi_checker.check(loi):
raise InvalidLoiError("String is not a valid LOI.")
date_checker = loi_.IsDateChecker()
if date_checker.check(id_parts[1]):
path = self.loi.separator.join(id_parts[0:3])
else:
path = self.loi.separator.join(id_parts[0:4])
if not self.storage.exists(path):
self.storage.create(path)
new_path = self.storage.create_next_id(path)
new_loi = self.loi.separator.join(
[
self.loi.root_issuer_separator.join(
[self.loi.root, self.loi.issuer]
),
self.loi.type,
*new_path.split(os.sep),
]
)
return new_loi
[docs]
def upload(self, loi="", content=None):
"""
Upload data to the datasafe.
Data are upload as bytes of the zipped content (dataset).
Parameters
----------
loi : :class:`str`
LOI the storage should be created for
content : :class:`bytes`
byte representation of a ZIP archive containing the contents to
be stored via the backend
Returns
-------
integrity : :class:`dict`
dict with fields ``data`` and ``all`` containing boolean values
For details see :meth:`datasafe.manifest.Manifest.check_integrity`.
Raises
------
datasafe.exceptions.MissingLoiError
Raised if no LOI is provided
datasafe.exceptions.LoiNotFoundError
Raised if resource corresponding to LOI does not exist
datasafe.exceptions.ExistingFileError
Raised if resource corresponding to LOI is not empty
"""
if not loi:
raise MissingLoiError("No LOI provided.")
self._check_loi(loi=loi)
if not self.storage.exists(self.loi.id):
raise LoiNotFoundError("LOI does not exist.")
if not self.storage.isempty(path=self.loi.id):
raise ExistingFileError("Directory not empty.")
return self.storage.deposit(path=self.loi.id, content=content)
[docs]
def download(self, loi=""):
"""
Download data from the datasafe.
Parameters
----------
loi : :class:`str`
LOI the data should be downloaded for
Returns
-------
content : :class:`bytes`
byte representation of a ZIP archive containing the contents of
the directory corresponding to path
Raises
------
datasafe.exceptions.MissingLoiError
Raised if no LOI is provided
datasafe.exceptions.LoiNotFoundError
Raised if resource corresponding to LOI cannot be found
datasafe.exceptions.MissingContentError
Raised if resource corresponding to LOI has no content
"""
if not loi:
raise MissingLoiError("No LOI provided.")
self._check_loi(loi=loi)
if not self.storage.exists(self.loi.id):
raise LoiNotFoundError("LOI does not exist.")
if self.storage.isempty(self.loi.id):
raise MissingContentError("LOI does not have content.")
return self.storage.retrieve(path=self.loi.id)
[docs]
def update(self, loi="", content=None):
"""
Update data in the datasafe.
Data are upload as bytes of the zipped content (dataset).
Parameters
----------
loi : :class:`str`
LOI the resource should be updated for
content : :class:`bytes`
byte representation of a ZIP archive containing the contents to
be updated via the backend
Returns
-------
integrity : :class:`dict`
dict with fields ``data`` and ``all`` containing boolean values
For details see :meth:`datasafe.manifest.Manifest.check_integrity`.
Raises
------
datasafe.exceptions.MissingLoiError
Raised if no LOI is provided
datasafe.exceptions.LoiNotFoundError
Raised if resource corresponding to LOI does not exist
datasafe.exceptions.NoFileError
Raised if resource corresponding to LOI is not empty
"""
if not loi:
raise MissingLoiError("No LOI provided.")
self._check_loi(loi=loi)
if not self.storage.exists(self.loi.id):
raise LoiNotFoundError("LOI does not exist.")
if self.storage.isempty(path=self.loi.id):
raise NoFileError("Directory empty")
self.storage.remove(path=self.loi.id, force=True)
return self.storage.deposit(path=self.loi.id, content=content)
def _check_loi(self, loi="", validate=True):
self.loi.parse(loi)
if self.loi.type != "ds":
raise InvalidLoiError("LOI is not a datasafe LOI.")
if validate:
if not self._loi_checker.check(loi):
raise InvalidLoiError("String is not a valid LOI.")
[docs]
class StorageBackend:
"""
File system backend for the datasafe, actually handling directories.
The storage backend does not care at all about LOIs, but only operates
on paths within the file system. As far as datasets are concerned,
the backend requires a manifest file to accompany each dataset. However,
it does *not* create such file. Furthermore, data are deposited (using
:meth:`deposit`) and retrieved (using :meth:`retrieve`) as streams
containing the contents of ZIP archives.
Attributes
----------
root_directory : :class:`str`
base directory for the datasafe
manifest_filename : :class:`str`
name of manifest file
"""
def __init__(self):
self.config = configuration.StorageBackend()
self.manifest_filename = (
self.config.manifest_filename or Manifest().manifest_filename
)
self.root_directory = self.config.root_directory or ""
[docs]
def working_path(self, path=""):
"""
Full path to working directory in datasafe
Returns
-------
working_path : :class:`str`
full path to work on
"""
return os.path.join(self.root_directory, path)
[docs]
def create(self, path=""):
"""
Create directory for given path.
Parameters
----------
path : :class:`str`
path to create directory for
Raises
------
datasafe.exceptions.MissingPathError
Raised if no path is provided
"""
if not path:
raise MissingPathError
os.makedirs(self.working_path(path))
[docs]
def exists(self, path=""):
"""
Check whether given path exists
Parameters
----------
path : :class:`str`
path to check
"""
return os.path.exists(self.working_path(path))
[docs]
def isempty(self, path=""):
"""
Check whether directory corresponding to path is empty
Parameters
----------
path : :class:`str`
path to check
Returns
-------
result : :class:`bool`
Returns true if directory corresponding to ``path`` is empty.
Raises
------
datasafe.exceptions.NoFileError
Raised if no path is provided
"""
if not os.path.exists(self.working_path(path)):
raise NoFileError
return not os.listdir(self.working_path(path))
[docs]
def remove(self, path="", force=False):
"""
Remove directory corresponding to path.
Usually, non-empty directories will not be removed but raise an
:class:`OSError` exception.
Parameters
----------
path : :class:`str`
path that should be removed
force : :class:`bool`
set to `True` when non-empty directory should be removed
default: `False`
Raises
------
OSError
Raised if a non-empty directory should be removed and
``force`` is set to ``False``
"""
if force:
shutil.rmtree(self.working_path(path))
else:
os.rmdir(self.working_path(path))
[docs]
def get_highest_id(self, path=""):
"""
Get number of subdirectory corresponding to path with highest number
Return last element of a sorted list of directory contents, assuming the
directory to only contain subdirectories with numeric IDs.
In case there is no numeric ID yet in the directory, it returns 0.
.. todo::
Handle directories whose names are not convertible to integers
Parameters
----------
path : :class:`str`
path to get subdirectory with highest number for
Returns
-------
id : :class:`int`
subdirectory with the highest number in the directory
corresponding to ``path``
"""
directory_contents = os.listdir(self.working_path(path))
# Important: Convert first to integers, then sort
directory_contents = list(map(int, directory_contents))
if not directory_contents:
highest_id = 0
else:
highest_id = sorted(directory_contents)[-1]
return highest_id
[docs]
def create_next_id(self, path=""):
"""
Create next subdirectory in directory corresponding to path
Parameters
----------
path : :class:`str`
path the subdirectory should be created in
"""
new_path = os.path.join(path, str(self.get_highest_id(path) + 1))
self.create(new_path)
return new_path
[docs]
def deposit(self, path="", content=None):
"""
Deposit data provided as content in directory corresponding to path.
Content is the byte representation of a ZIP archive containing the
actual content. This byte representation is saved in a temporary
file and afterwards unpacked in the directory corresponding to path.
After depositing the content (including unzipping), the checksums in
the manifest are checked for consistency with newly generated
checksums, and in case of inconsistencies, an exception is raised.
Parameters
----------
path : :class:`str`
path to deposit content to
content : :class:`bytes`
byte representation of a ZIP archive containing the contents to
be extracted in the directory corresponding to path
Returns
-------
integrity : :class:`dict`
dict with fields ``data`` and ``all`` containing boolean values
For details see :meth:`datasafe.manifest.Manifest.check_integrity`.
Raises
------
datasafe.exceptions.MissingPathError
Raised if no path is provided
datasafe.exceptions.MissingContentError
Raised if no content is provided
"""
if not path:
raise MissingPathError(message="No path provided.")
if not content:
raise MissingContentError(
message="No content provided to deposit."
)
tmpfile = tempfile.mkstemp(suffix=".zip")
with open(tmpfile[1], "wb") as file:
file.write(content)
shutil.unpack_archive(tmpfile[1], self.working_path(path))
with change_working_dir(self.working_path(path)):
manifest = Manifest()
manifest.from_file(manifest.manifest_filename)
integrity = manifest.check_integrity()
os.remove(tmpfile[1])
return integrity
[docs]
def retrieve(self, path=""):
"""
Obtain data from directory corresponding to path
The data are compressed as ZIP archive and the contents of the ZIP
file is returned as bytes.
Parameters
----------
path : :class:`str`
path the data should be retrieved for
Returns
-------
content : :class:`bytes`
byte representation of a ZIP archive containing the contents of
the directory corresponding to path
Raises
------
datasafe.directory.MissingPathError
Raised if no path is provided
OSError
Raised if path does not exist
"""
if not path:
raise MissingPathError(message="No path provided.")
tmpfile = tempfile.mkstemp()
zip_archive = shutil.make_archive(
base_name=tmpfile[1],
format="zip",
root_dir=self.working_path(path),
)
with open(zip_archive, "rb") as zip_file:
contents = zip_file.read()
# noinspection PyTypeChecker
os.remove(tmpfile[1] + ".zip")
os.remove(tmpfile[1])
return contents
[docs]
def get_manifest(self, path=""):
"""
Retrieve manifest of a dataset stored in path.
Parameters
----------
path : :class:`str`
path to the dataset the manifest should be retrieved for
Returns
-------
content : :class:`str`
contents of the manifest file
"""
if not path:
raise MissingPathError(message="No path provided.")
if not os.path.exists(path):
raise MissingPathError(message=f"Path {path} does not exist.")
if not os.path.exists(os.path.join(path, self.manifest_filename)):
raise MissingContentError(message="No MANIFEST file found.")
with open(
os.path.join(path, self.manifest_filename), "r", encoding="utf8"
) as file:
manifest_contents = file.read()
return manifest_contents
[docs]
def get_index(self):
"""
Return list of paths to datasets
Such a list of paths to datasets is pretty useful if one intends to
check locally for existing LOIs (corresponding to paths in the
datasafe).
If a path has been created already, but no data yet saved in there,
as may happen during an experiment to reserve the corresponding LOI,
this path will nevertheless be included.
Returns
-------
paths : :class:`list`
list of paths to datasets
"""
if self.root_directory:
top = self.root_directory
else:
top = "."
paths = []
for root, dirs, _ in os.walk(top):
for dir_ in dirs:
files_in_dir = os.listdir(os.path.join(root, dir_))
if not files_in_dir or self.manifest_filename in files_in_dir:
paths.append(
os.path.join(root, dir_).replace(
os.path.join(top, ""), ""
)
)
return paths
[docs]
def check_integrity(self, path=""):
"""
Check integrity of dataset, comparing stored with generated checksums.
To check the integrity of a dataset, the checksums stored within the
manifest file will be compared to newly generated checksums over
data and metadata together as well as over data alone.
Parameters
----------
path : :class:`str`
path to the dataset the integrity should be checked for
Returns
-------
integrity : :class:`dict`
dict with fields ``data`` and ``all`` containing boolean values
"""
if self.manifest_filename not in os.listdir(path):
raise MissingContentError(message="No manifest file found.")
manifest = Manifest()
manifest.from_file(os.path.join(path, self.manifest_filename))
return manifest.check_integrity()
[docs]
def create_http_server(test_config=None):
"""
Create a HTTP server for accessing the datasafe.
Parameters
----------
test_config : :class:`dict`
Configuration for HTTP server
Returns
-------
app : :class:`flask.Flask`
WSGI application created via flask
"""
app = Flask(__name__) # , instance_relative_config=True)
# app.config.from_object(Config())
if test_config:
app.config.from_mapping(test_config)
@app.route("/heartbeat")
def heartbeat():
return "alive"
@app.route("/api/")
def api_test():
return "alive"
dataset = HTTPServerAPI.as_view("datasets")
app.add_url_rule("/api/<path:loi>", view_func=dataset)
return app
[docs]
class HTTPServerAPI(MethodView):
"""
API view used in the HTTP server.
The actual server is created via :func:`create_http_server` and operates
via flask. This API view provides the actual API functionality to access
the datasafe and its underlying storage backend via HTTP.
The API provides methods for the HTTP methods, currently GET, POST, PUT,
and PATCH.
Furthermore, exceptions are converted into the appropriate HTTP status
codes and the message of the exception is contained in the response
body. Thus, clients such as :class:`datasafe.client.HTTPClient` can
convert the HTTP status codes back into Python exceptions.
Attributes
----------
server : :class:`datasafe.server.Server`
Server backend that communicates with the storage backend.
"""
def __init__(self):
self.server = Server()
[docs]
def get(self, loi=""):
"""
Handle get requests.
The following responses are currently returned, depending on the
status the request resulted in:
========= ==== ==============================
Status Code data
========= ==== ==============================
success 200 dataset contents (ZIP archive)
no data 204 message
not found 404 error message
invalid 404 error message
========= ==== ==============================
The status "no data" results from querying a LOI that has been
created (using POST), but no data uploaded to so far.
The status "invalid" differs from "not found" in that the LOI
requested is invalid.
Parameters
----------
loi : :class:`str`
LOI of get request
Returns
-------
response : :class:`flask.Response`
Response object
"""
try:
content = self.server.download(loi=loi)
status = 200
except MissingContentError:
content = "LOI does not have any content"
status = 204
except (LoiNotFoundError, InvalidLoiError) as exception:
content = exception.message
status = 404
return content, status
[docs]
def post(self, loi=""):
"""
Handle POST requests.
A POST request will only create a new empty resource connected to
the LOI, but never upload data. For uploading, use put. While this
may seem like not conforming to the typical usage of POST requests,
the reason is simple: :meth:`post` returns the newly created LOI,
while :meth:`put` returns the JSON representation of the integrity
check dict. Hence, to be able to check that the data have been
successfully arrived at the datasafe storage backend,
it is essential to separate POST and PUT requests.
The following responses are currently returned, depending on the
status the request resulted in:
========= ==== ==============================
Status Code data
========= ==== ==============================
created 201 newly created LOI
invalid 404 error message
========= ==== ==============================
Parameters
----------
loi : :class:`str`
LOI of post request
Returns
-------
response : class:`flask.Response`
Response object
"""
try:
content = self.server.new(loi=loi)
status = 201
except InvalidLoiError as exception:
content = exception.message
status = 404
return content, status
[docs]
def put(
self,
loi="",
):
"""
Handle PUT requests.
PUT requests are used to transfer data to an *existing* resource of the
datasafe. To create a new resource, use :meth:`post` beforehand. If
data exist already at the resource, this will result in an error
(status code 405, see table below).
The following responses are currently returned, depending on the
status the request resulted in:
================ ==== ===========================================
Status Code data
================ ==== ===========================================
success 200 JSON representation of integrity check dict
does not exist 400 error message
missing content 400 error message
invalid 404 error message
existing content 405 error message
================ ==== ===========================================
The status "does not exist" refers to the LOI the data should be put
to not existing (in this case, you need to first create it using
PUSH). Therefore, in this particular case, status code 400 instead
of 404 ("not found") is returned.
The status "missing content" refers to the request missing data.
The status "existing content" refers to data already present at the
storage referred to with the LOI. As generally, you could update the
content using another method, a status code 405 ("method not
allowed") is returned in this case.
Parameters
----------
loi : :class:`str`
LOI of put request
Returns
-------
response : class:`flask.Response`
Response object
"""
header = None
try:
content = self.server.upload(loi=loi, content=request.data)
status = 200
except InvalidLoiError as exception:
content = exception.message
status = 404
except (LoiNotFoundError, MissingContentError) as exception:
content = exception.message
status = 400
except ExistingFileError as exception:
content = exception.message
status = 405
header = {"allow": "PATCH"}
return content, status, header
[docs]
def patch(self, loi=""):
"""
Handle PATCH requests.
PATCH requests are used to *update* data at an existing resource of the
datasafe. To upload new data to an existing resource,
use :meth:`put`. If no data exist at the resource, this will result
in an error (status code 405, see table below).
The following responses are currently returned, depending on the
status the request resulted in:
=================== ==== ===========================================
Status Code data
=================== ==== ===========================================
success 200 JSON representation of integrity check dict
does not exist 400 error message
missing content 400 error message
invalid 404 error message
no resource content 405 error message
=================== ==== ===========================================
The status "does not exist" refers to the LOI the data should be put
to not existing (in this case, you need to first create it using
PUSH). Therefore, in this particular case, status code 400 instead
of 404 ("not found") is returned.
The status "missing content" refers to the request missing data.
The status "no resource content" refers to no data present at the
storage referred to with the LOI. As generally, you could upload new
content using another method, a status code 405 ("method not
allowed") is returned in this case.
Parameters
----------
loi : :class:`str`
LOI of put request
Returns
-------
response : class:`flask.Response`
Response object
"""
header = None
try:
content = self.server.update(loi=loi, content=request.data)
status = 200
except InvalidLoiError as exception:
content = exception.message
status = 404
except (MissingContentError, LoiNotFoundError) as exception:
content = exception.message
status = 400
except NoFileError as exception:
content = exception.message
status = 405
header = {"allow": "PUT"}
return content, status, header