Source code for bioblend.galaxy.libraries

"""
Contains possible interactions with the Galaxy Data Libraries
"""
import logging
import time
from typing import (
    Any,
    Dict,
    List,
    Optional,
    TYPE_CHECKING,
)

from typing_extensions import Literal

from bioblend.galaxy.client import Client
from bioblend.galaxy.datasets import (
    DatasetTimeoutException,
    TERMINAL_STATES,
)
from bioblend.util import attach_file

if TYPE_CHECKING:
    from bioblend.galaxy import GalaxyInstance

LinkDataOnly = Literal["copy_files", "link_to_files"]

log = logging.getLogger(__name__)


[docs]class LibraryClient(Client): module = "libraries" def __init__(self, galaxy_instance: "GalaxyInstance") -> None: super().__init__(galaxy_instance)
[docs] def create_library( self, name: str, description: Optional[str] = None, synopsis: Optional[str] = None ) -> Dict[str, Any]: """ Create a data library with the properties defined in the arguments. :type name: str :param name: Name of the new data library :type description: str :param description: Optional data library description :type synopsis: str :param synopsis: Optional data library synopsis :rtype: dict :return: Details of the created library. For example:: {'id': 'f740ab636b360a70', 'name': 'Library from bioblend', 'url': '/api/libraries/f740ab636b360a70'} """ payload = {"name": name} if description: payload["description"] = description if synopsis: payload["synopsis"] = synopsis return self._post(payload)
[docs] def delete_library(self, library_id: str) -> Dict[str, Any]: """ Delete a data library. :type library_id: str :param library_id: Encoded data library ID identifying the library to be deleted :rtype: dict :return: Information about the deleted library .. warning:: Deleting a data library is irreversible - all of the data from the library will be permanently deleted. """ return self._delete(id=library_id)
def _show_item(self, library_id: str, item_id: str) -> Dict[str, Any]: """ Get details about a given library item. """ url = "/".join((self._make_url(library_id, contents=True), item_id)) return self._get(url=url)
[docs] def delete_library_dataset(self, library_id: str, dataset_id: str, purged: bool = False) -> Dict[str, Any]: """ Delete a library dataset in a data library. :type library_id: str :param library_id: library id where dataset is found in :type dataset_id: str :param dataset_id: id of the dataset to be deleted :type purged: bool :param purged: Indicate that the dataset should be purged (permanently deleted) :rtype: dict :return: A dictionary containing the dataset id and whether the dataset has been deleted. For example:: {'deleted': True, 'id': '60e680a037f41974'} """ url = "/".join((self._make_url(library_id, contents=True), dataset_id)) return self._delete(payload={"purged": purged}, url=url)
[docs] def update_library_dataset(self, dataset_id: str, **kwargs: Any) -> Dict[str, Any]: """ Update library dataset metadata. Some of the attributes that can be modified are documented below. :type dataset_id: str :param dataset_id: id of the dataset to be updated :type name: str :param name: Replace library dataset name with the given string :type misc_info: str :param misc_info: Replace library dataset misc_info with given string :type file_ext: str :param file_ext: Replace library dataset extension (must exist in the Galaxy registry) :type genome_build: str :param genome_build: Replace library dataset genome build (dbkey) :type tags: list :param tags: Replace library dataset tags with the given list :rtype: dict :return: details of the updated dataset """ url = "/".join((self._make_url(), "datasets", dataset_id)) return self._patch(payload=kwargs, url=url)
[docs] def show_dataset(self, library_id: str, dataset_id: str) -> Dict[str, Any]: """ Get details about a given library dataset. The required ``library_id`` can be obtained from the datasets's library content details. :type library_id: str :param library_id: library id where dataset is found in :type dataset_id: str :param dataset_id: id of the dataset to be inspected :rtype: dict :return: A dictionary containing information about the dataset in the library """ return self._show_item(library_id, dataset_id)
[docs] def wait_for_dataset( self, library_id: str, dataset_id: str, maxwait: float = 12000, interval: float = 3 ) -> Dict[str, Any]: """ Wait until the library dataset state is terminal ('ok', 'empty', 'error', 'discarded' or 'failed_metadata'). :type library_id: str :param library_id: library id where dataset is found in :type dataset_id: str :param dataset_id: id of the dataset to wait for :type maxwait: float :param maxwait: Total time (in seconds) to wait for the dataset state to become terminal. If the dataset state is not terminal within this time, a ``DatasetTimeoutException`` will be thrown. :type interval: float :param interval: Time (in seconds) to wait between 2 consecutive checks. :rtype: dict :return: A dictionary containing information about the dataset in the library """ assert maxwait >= 0 assert interval > 0 time_left = maxwait while True: dataset = self.show_dataset(library_id, dataset_id) state = dataset["state"] if state in TERMINAL_STATES: return dataset if time_left > 0: log.info( "Dataset %s in library %s is in non-terminal state %s. Will wait %i more s", dataset_id, library_id, state, time_left, ) time.sleep(min(time_left, interval)) time_left -= interval else: raise DatasetTimeoutException( f"Dataset {dataset_id} in library {library_id} is still in non-terminal state {state} after {maxwait} s" )
[docs] def show_folder(self, library_id: str, folder_id: str) -> Dict[str, Any]: """ Get details about a given folder. The required ``folder_id`` can be obtained from the folder's library content details. :type library_id: str :param library_id: library id to inspect folders in :type folder_id: str :param folder_id: id of the folder to be inspected :rtype: dict :return: Information about the folder """ return self._show_item(library_id, folder_id)
def _get_root_folder_id(self, library_id: str) -> str: """ Find the root folder (i.e. '/') of a library. :type library_id: str :param library_id: library id to find root of """ library_dict = self.show_library(library_id=library_id) return library_dict["root_folder_id"]
[docs] def create_folder( self, library_id: str, folder_name: str, description: Optional[str] = None, base_folder_id: Optional[str] = None ) -> List[Dict[str, Any]]: """ Create a folder in a library. :type library_id: str :param library_id: library id to use :type folder_name: str :param folder_name: name of the new folder in the data library :type description: str :param description: description of the new folder in the data library :type base_folder_id: str :param base_folder_id: id of the folder where to create the new folder. If not provided, the root folder will be used :rtype: list :return: List with a single dictionary containing information about the new folder """ # Get root folder ID if no ID was provided if base_folder_id is None: base_folder_id = self._get_root_folder_id(library_id) # Compose the payload payload = { "name": folder_name, "folder_id": base_folder_id, "create_type": "folder", } if description is not None: payload["description"] = description return self._post(payload, id=library_id, contents=True)
[docs] def get_folders( self, library_id: str, folder_id: Optional[str] = None, name: Optional[str] = None ) -> List[Dict[str, Any]]: """ Get all the folders in a library, or select a subset by specifying a folder name for filtering. :type library_id: str :param library_id: library id to use :type name: str :param name: Folder name to filter on. For ``name`` specify the full path of the folder starting from the library's root folder, e.g. ``/subfolder/subsubfolder``. :rtype: list :return: list of dicts each containing basic information about a folder .. versionchanged:: 1.1.1 Using the deprecated ``folder_id`` parameter now raises a ``ValueError`` exception. """ if folder_id is not None: raise ValueError( "The folder_id parameter has been removed, use the show_folder() method to view details of a folder for which you know the ID." ) library_contents = self.show_library(library_id=library_id, contents=True) if name is not None: folders = [_ for _ in library_contents if _["type"] == "folder" and _["name"] == name] else: folders = [_ for _ in library_contents if _["type"] == "folder"] return folders
[docs] def get_libraries( self, library_id: Optional[str] = None, name: Optional[str] = None, deleted: Optional[bool] = False ) -> List[Dict[str, Any]]: """ Get all libraries, or select a subset by specifying optional arguments for filtering (e.g. a library name). :type name: str :param name: Library name to filter on. :type deleted: bool :param deleted: If ``False`` (the default), return only non-deleted libraries. If ``True``, return only deleted libraries. If ``None``, return both deleted and non-deleted libraries. :rtype: list :return: list of dicts each containing basic information about a library .. versionchanged:: 1.1.1 Using the deprecated ``library_id`` parameter now raises a ``ValueError`` exception. """ if library_id is not None: raise ValueError( "The library_id parameter has been removed, use the show_library() method to view details of a library for which you know the ID." ) libraries = self._get(params={"deleted": deleted}) if name is not None: libraries = [_ for _ in libraries if _["name"] == name] return libraries
[docs] def show_library(self, library_id: str, contents: bool = False) -> Dict[str, Any]: """ Get information about a library. :type library_id: str :param library_id: filter for library by library id :type contents: bool :param contents: whether to get contents of the library (rather than just the library details) :rtype: dict :return: details of the given library """ return self._get(id=library_id, contents=contents)
def _do_upload(self, library_id: str, **kwargs: Any) -> List[Dict[str, Any]]: """ Set up the POST request and do the actual data upload to a data library. This method should not be called directly but instead refer to the methods specific for the desired type of data upload. """ folder_id = kwargs.get("folder_id") if folder_id is None: folder_id = self._get_root_folder_id(library_id) files_attached = False # Compose the payload dict payload = { "folder_id": folder_id, "file_type": kwargs.get("file_type", "auto"), "dbkey": kwargs.get("dbkey", "?"), "create_type": "file", "tag_using_filenames": kwargs.get("tag_using_filenames", False), "preserve_dirs": kwargs.get("preserve_dirs", False), } if kwargs.get("roles"): payload["roles"] = kwargs["roles"] if kwargs.get("link_data_only") and kwargs["link_data_only"] != "copy_files": payload["link_data_only"] = "link_to_files" if kwargs.get("tags"): payload["tags"] = kwargs["tags"] # upload options if kwargs.get("file_url") is not None: payload["upload_option"] = "upload_file" payload["files_0|url_paste"] = kwargs["file_url"] elif kwargs.get("pasted_content") is not None: payload["upload_option"] = "upload_file" payload["files_0|url_paste"] = kwargs["pasted_content"] elif kwargs.get("server_dir") is not None: payload["upload_option"] = "upload_directory" payload["server_dir"] = kwargs["server_dir"] elif kwargs.get("file_local_path") is not None: payload["upload_option"] = "upload_file" payload["files_0|file_data"] = attach_file(kwargs["file_local_path"]) files_attached = True elif kwargs.get("filesystem_paths") is not None: payload["upload_option"] = "upload_paths" payload["filesystem_paths"] = kwargs["filesystem_paths"] try: return self._post(payload, id=library_id, contents=True, files_attached=files_attached) finally: if payload.get("files_0|file_data") is not None: payload["files_0|file_data"].close()
[docs] def upload_file_from_url( self, library_id: str, file_url: str, folder_id: Optional[str] = None, file_type: str = "auto", dbkey: str = "?", tags: Optional[List[str]] = None, ) -> List[Dict[str, Any]]: """ Upload a file to a library from a URL. :type library_id: str :param library_id: id of the library where to place the uploaded file :type file_url: str :param file_url: URL of the file to upload :type folder_id: str :param folder_id: id of the folder where to place the uploaded file. If not provided, the root folder will be used :type file_type: str :param file_type: Galaxy file format name :type dbkey: str :param dbkey: Dbkey :type tags: list :param tags: A list of tags to add to the datasets :rtype: list :return: List with a single dictionary containing information about the LDDA """ return self._do_upload( library_id, file_url=file_url, folder_id=folder_id, file_type=file_type, dbkey=dbkey, tags=tags )
[docs] def upload_file_contents( self, library_id: str, pasted_content: str, folder_id: Optional[str] = None, file_type: str = "auto", dbkey: str = "?", tags: Optional[List[str]] = None, ) -> List[Dict[str, Any]]: """ Upload pasted_content to a data library as a new file. :type library_id: str :param library_id: id of the library where to place the uploaded file :type pasted_content: str :param pasted_content: Content to upload into the library :type folder_id: str :param folder_id: id of the folder where to place the uploaded file. If not provided, the root folder will be used :type file_type: str :param file_type: Galaxy file format name :type dbkey: str :param dbkey: Dbkey :type tags: list :param tags: A list of tags to add to the datasets :rtype: list :return: List with a single dictionary containing information about the LDDA """ return self._do_upload( library_id, pasted_content=pasted_content, folder_id=folder_id, file_type=file_type, dbkey=dbkey, tags=tags )
[docs] def upload_file_from_local_path( self, library_id: str, file_local_path: str, folder_id: Optional[str] = None, file_type: str = "auto", dbkey: str = "?", tags: Optional[List[str]] = None, ) -> List[Dict[str, Any]]: """ Read local file contents from file_local_path and upload data to a library. :type library_id: str :param library_id: id of the library where to place the uploaded file :type file_local_path: str :param file_local_path: path of local file to upload :type folder_id: str :param folder_id: id of the folder where to place the uploaded file. If not provided, the root folder will be used :type file_type: str :param file_type: Galaxy file format name :type dbkey: str :param dbkey: Dbkey :type tags: list :param tags: A list of tags to add to the datasets :rtype: list :return: List with a single dictionary containing information about the LDDA """ return self._do_upload( library_id, file_local_path=file_local_path, folder_id=folder_id, file_type=file_type, dbkey=dbkey, tags=tags, )
[docs] def upload_file_from_server( self, library_id: str, server_dir: str, folder_id: Optional[str] = None, file_type: str = "auto", dbkey: str = "?", link_data_only: Optional[LinkDataOnly] = None, roles: str = "", preserve_dirs: bool = False, tag_using_filenames: bool = False, tags: Optional[List[str]] = None, ) -> List[Dict[str, Any]]: """ Upload all files in the specified subdirectory of the Galaxy library import directory to a library. :type library_id: str :param library_id: id of the library where to place the uploaded file :type server_dir: str :param server_dir: relative path of the subdirectory of ``library_import_dir`` to upload. All and only the files (i.e. no subdirectories) contained in the specified directory will be uploaded :type folder_id: str :param folder_id: id of the folder where to place the uploaded files. If not provided, the root folder will be used :type file_type: str :param file_type: Galaxy file format name :type dbkey: str :param dbkey: Dbkey :type link_data_only: str :param link_data_only: either 'copy_files' (default) or 'link_to_files'. Setting to 'link_to_files' symlinks instead of copying the files :type roles: str :param roles: ??? :type preserve_dirs: bool :param preserve_dirs: Indicate whether to preserve the directory structure when importing dir :type tag_using_filenames: bool :param tag_using_filenames: Indicate whether to generate dataset tags from filenames. .. versionchanged:: 0.14.0 Changed the default from ``True`` to ``False``. :type tags: list :param tags: A list of tags to add to the datasets :rtype: list :return: List with a single dictionary containing information about the LDDA .. note:: This method works only if the Galaxy instance has the ``library_import_dir`` option configured in the ``config/galaxy.yml`` configuration file. """ return self._do_upload( library_id, server_dir=server_dir, folder_id=folder_id, file_type=file_type, dbkey=dbkey, link_data_only=link_data_only, roles=roles, preserve_dirs=preserve_dirs, tag_using_filenames=tag_using_filenames, tags=tags, )
[docs] def upload_from_galaxy_filesystem( self, library_id: str, filesystem_paths: str, folder_id: Optional[str] = None, file_type: str = "auto", dbkey: str = "?", link_data_only: Optional[LinkDataOnly] = None, roles: str = "", preserve_dirs: bool = False, tag_using_filenames: bool = False, tags: Optional[List[str]] = None, ) -> List[Dict[str, Any]]: """ Upload a set of files already present on the filesystem of the Galaxy server to a library. :type library_id: str :param library_id: id of the library where to place the uploaded file :type filesystem_paths: str :param filesystem_paths: file paths on the Galaxy server to upload to the library, one file per line :type folder_id: str :param folder_id: id of the folder where to place the uploaded files. If not provided, the root folder will be used :type file_type: str :param file_type: Galaxy file format name :type dbkey: str :param dbkey: Dbkey :type link_data_only: str :param link_data_only: either 'copy_files' (default) or 'link_to_files'. Setting to 'link_to_files' symlinks instead of copying the files :type roles: str :param roles: ??? :type preserve_dirs: bool :param preserve_dirs: Indicate whether to preserve the directory structure when importing dir :type tag_using_filenames: bool :param tag_using_filenames: Indicate whether to generate dataset tags from filenames. .. versionchanged:: 0.14.0 Changed the default from ``True`` to ``False``. :type tags: list :param tags: A list of tags to add to the datasets :rtype: list :return: List of dictionaries containing information about each uploaded LDDA. .. note:: This method works only if the Galaxy instance has the ``allow_path_paste`` option set to ``true`` in the ``config/galaxy.yml`` configuration file. """ return self._do_upload( library_id, filesystem_paths=filesystem_paths, folder_id=folder_id, file_type=file_type, dbkey=dbkey, link_data_only=link_data_only, roles=roles, preserve_dirs=preserve_dirs, tag_using_filenames=tag_using_filenames, tags=tags, )
[docs] def copy_from_dataset( self, library_id: str, dataset_id: str, folder_id: Optional[str] = None, message: str = "" ) -> Dict[str, Any]: """ Copy a Galaxy dataset into a library. :type library_id: str :param library_id: id of the library where to place the uploaded file :type dataset_id: str :param dataset_id: id of the dataset to copy from :type folder_id: str :param folder_id: id of the folder where to place the uploaded files. If not provided, the root folder will be used :type message: str :param message: message for copying action :rtype: dict :return: LDDA information """ if folder_id is None: folder_id = self._get_root_folder_id(library_id) payload = { "folder_id": folder_id, "create_type": "file", "from_hda_id": dataset_id, "ldda_message": message, } return self._post(payload, id=library_id, contents=True)
[docs] def get_library_permissions(self, library_id: str) -> Dict[str, Any]: """ Get the permissions for a library. :type library_id: str :param library_id: id of the library :rtype: dict :return: dictionary with all applicable permissions' values """ url = self._make_url(library_id) + "/permissions" return self._get(url=url)
[docs] def get_dataset_permissions(self, dataset_id: str) -> Dict[str, Any]: """ Get the permissions for a dataset. :type dataset_id: str :param dataset_id: id of the dataset :rtype: dict :return: dictionary with all applicable permissions' values """ url = "/".join((self._make_url(), "datasets", dataset_id, "permissions")) return self._get(url=url)
[docs] def set_library_permissions( self, library_id: str, access_in: Optional[List[str]] = None, modify_in: Optional[List[str]] = None, add_in: Optional[List[str]] = None, manage_in: Optional[List[str]] = None, ) -> Dict[str, Any]: """ Set the permissions for a library. Note: it will override all security for this library even if you leave out a permission type. :type library_id: str :param library_id: id of the library :type access_in: list :param access_in: list of role ids :type modify_in: list :param modify_in: list of role ids :type add_in: list :param add_in: list of role ids :type manage_in: list :param manage_in: list of role ids :rtype: dict :return: General information about the library """ payload: Dict[str, List[str]] = {} if access_in: payload["LIBRARY_ACCESS_in"] = access_in if modify_in: payload["LIBRARY_MODIFY_in"] = modify_in if add_in: payload["LIBRARY_ADD_in"] = add_in if manage_in: payload["LIBRARY_MANAGE_in"] = manage_in url = self._make_url(library_id) + "/permissions" return self._post(payload, url=url)
[docs] def set_dataset_permissions( self, dataset_id: str, access_in: Optional[List[str]] = None, modify_in: Optional[List[str]] = None, manage_in: Optional[List[str]] = None, ) -> Dict[str, Any]: """ Set the permissions for a dataset. Note: it will override all security for this dataset even if you leave out a permission type. :type dataset_id: str :param dataset_id: id of the dataset :type access_in: list :param access_in: list of role ids :type modify_in: list :param modify_in: list of role ids :type manage_in: list :param manage_in: list of role ids :rtype: dict :return: dictionary with all applicable permissions' values """ # we need here to define an action payload: Dict[str, Any] = { "action": "set_permissions", } if access_in: payload["access_ids[]"] = access_in if modify_in: payload["modify_ids[]"] = modify_in if manage_in: payload["manage_ids[]"] = manage_in url = "/".join((self._make_url(), "datasets", dataset_id, "permissions")) return self._post(payload, url=url)