Base
pydatalab.blocks.base
¶
DataBlock
¶
Base class for a data block.
Source code in pydatalab/blocks/base.py
class DataBlock:
"""Base class for a data block."""
name: str
"""The human-readable block name specifying which technique
or file format it pertains to.
"""
blocktype: str = "generic"
"""A short (unique) string key specifying the type of block."""
description: str = "Generic Block"
"""A longer description outlining the purpose and capability
of the block."""
accepted_file_extensions: tuple[str, ...] | None
"""A list of file extensions that the block will attempt to read."""
defaults: Dict[str, Any] = {}
"""Any default values that should be set if they are not
supplied during block init.
"""
plot_functions: Optional[Sequence[Callable[[], None]]] = None
"""A list of methods that will generate plots for this block."""
_supports_collections: bool = False
"""Whether this datablock can operate on collection data, or just individual items"""
def __init__(
self,
item_id: Optional[str] = None,
collection_id: Optional[str] = None,
init_data=None,
unique_id=None,
):
"""Create a data block object for the given `item_id` or `collection_id`.
Parameters:
item_id: The item to which the block is attached, or
collection_id: The collection to which the block is attached.
init_data: A dictionary of data to initialise the block with.
unique_id: A unique id for the block, used in the DOM and database.
"""
if init_data is None:
init_data = {}
if item_id is None and not self._supports_collections:
raise RuntimeError(f"Must supply `item_id` to make {self.__class__.__name__}.")
if collection_id is not None and not self._supports_collections:
raise RuntimeError(
f"This block ({self.__class__.__name__}) does not support collections."
)
if item_id is not None and collection_id is not None:
raise RuntimeError("Must provide only one of `item_id` and `collection_id`.")
LOGGER.debug(
"Creating new block '%s' associated with item_id '%s'",
self.__class__.__name__,
item_id,
)
self.block_id = (
unique_id or generate_random_id()
) # this is supposed to be a unique id for use in html and the database.
self.data = {
"item_id": item_id,
"collection_id": collection_id,
"blocktype": self.blocktype,
"block_id": self.block_id,
**self.defaults,
}
# convert ObjectId file_ids to string to make handling them easier when sending to and from web
if "file_id" in self.data:
self.data["file_id"] = str(self.data["file_id"])
if "title" not in self.data:
self.data["title"] = self.name
self.data.update(
init_data
) # this could overwrite blocktype and block_id. I think that's reasonable... maybe
LOGGER.debug(
"Initialised block %s for item ID %s or collection ID %s.",
self.__class__.__name__,
item_id,
collection_id,
)
def to_db(self):
"""returns a dictionary with the data for this
block, ready to be input into mongodb"""
LOGGER.debug("Casting block %s to database object.", self.__class__.__name__)
if "bokeh_plot_data" in self.data:
self.data.pop("bokeh_plot_data")
if "file_id" in self.data:
dict_for_db = self.data.copy() # gross, I know
dict_for_db["file_id"] = ObjectId(dict_for_db["file_id"])
return dict_for_db
return self.data
@classmethod
def from_db(cls, db_entry):
"""create a block from json (dictionary) stored in a db"""
LOGGER.debug("Loading block %s from database object.", cls.__class__.__name__)
new_block = cls(
item_id=db_entry.get("item_id"),
collection_id=db_entry.get("collection_id"),
dictionary=db_entry,
)
if "file_id" in new_block.data:
new_block.data["file_id"] = str(new_block.data["file_id"])
if new_block.data.get("title", "") == new_block.description:
new_block.data["title"] = new_block.name
return new_block
def to_web(self) -> Dict[str, Any]:
"""Returns a JSON serializable dictionary to render the data block on the web."""
block_errors = []
block_warnings = []
if self.plot_functions:
for plot in self.plot_functions:
with warnings.catch_warnings(record=True) as captured_warnings:
try:
plot()
except Exception as e:
block_errors.append(f"{self.__class__.__name__} raised error: {e}")
LOGGER.warning(
f"Could not create plot for {self.__class__.__name__}: {self.data}"
)
finally:
if captured_warnings:
block_warnings.extend(
[
f"{self.__class__.__name__} raised warning: {w.message}"
for w in captured_warnings
]
)
# If the last plotting run did not raise any errors or warnings, remove any old ones
if block_errors:
self.data["errors"] = block_errors
else:
self.data.pop("errors", None)
if block_warnings:
self.data["warnings"] = block_warnings
else:
self.data.pop("warnings", None)
return self.data
@classmethod
def from_web(cls, data):
LOGGER.debug("Loading block %s from web request.", cls.__class__.__name__)
block = cls(
item_id=data.get("item_id"),
collection_id=data.get("collection_id"),
unique_id=data["block_id"],
)
block.update_from_web(data)
return block
def update_from_web(self, data):
"""update the object with data received from the website. Only updates fields
that are specified in the dictionary- other fields are left alone"""
LOGGER.debug(
"Updating block %s from web request",
self.__class__.__name__,
)
self.data.update(data)
return self
blocktype: str
¶
A short (unique) string key specifying the type of block.
defaults: Dict[str, Any]
¶
Any default values that should be set if they are not supplied during block init.
description: str
¶
A longer description outlining the purpose and capability of the block.
plot_functions: Optional[Sequence[Callable[[], NoneType]]]
¶
A list of methods that will generate plots for this block.
__init__(self, item_id: Optional[str] = None, collection_id: Optional[str] = None, init_data = None, unique_id = None)
special
¶
Create a data block object for the given item_id
or collection_id
.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
item_id |
Optional[str] |
The item to which the block is attached, or |
None |
collection_id |
Optional[str] |
The collection to which the block is attached. |
None |
init_data |
A dictionary of data to initialise the block with. |
None |
|
unique_id |
A unique id for the block, used in the DOM and database. |
None |
Source code in pydatalab/blocks/base.py
def __init__(
self,
item_id: Optional[str] = None,
collection_id: Optional[str] = None,
init_data=None,
unique_id=None,
):
"""Create a data block object for the given `item_id` or `collection_id`.
Parameters:
item_id: The item to which the block is attached, or
collection_id: The collection to which the block is attached.
init_data: A dictionary of data to initialise the block with.
unique_id: A unique id for the block, used in the DOM and database.
"""
if init_data is None:
init_data = {}
if item_id is None and not self._supports_collections:
raise RuntimeError(f"Must supply `item_id` to make {self.__class__.__name__}.")
if collection_id is not None and not self._supports_collections:
raise RuntimeError(
f"This block ({self.__class__.__name__}) does not support collections."
)
if item_id is not None and collection_id is not None:
raise RuntimeError("Must provide only one of `item_id` and `collection_id`.")
LOGGER.debug(
"Creating new block '%s' associated with item_id '%s'",
self.__class__.__name__,
item_id,
)
self.block_id = (
unique_id or generate_random_id()
) # this is supposed to be a unique id for use in html and the database.
self.data = {
"item_id": item_id,
"collection_id": collection_id,
"blocktype": self.blocktype,
"block_id": self.block_id,
**self.defaults,
}
# convert ObjectId file_ids to string to make handling them easier when sending to and from web
if "file_id" in self.data:
self.data["file_id"] = str(self.data["file_id"])
if "title" not in self.data:
self.data["title"] = self.name
self.data.update(
init_data
) # this could overwrite blocktype and block_id. I think that's reasonable... maybe
LOGGER.debug(
"Initialised block %s for item ID %s or collection ID %s.",
self.__class__.__name__,
item_id,
collection_id,
)
to_db(self)
¶
returns a dictionary with the data for this block, ready to be input into mongodb
Source code in pydatalab/blocks/base.py
def to_db(self):
"""returns a dictionary with the data for this
block, ready to be input into mongodb"""
LOGGER.debug("Casting block %s to database object.", self.__class__.__name__)
if "bokeh_plot_data" in self.data:
self.data.pop("bokeh_plot_data")
if "file_id" in self.data:
dict_for_db = self.data.copy() # gross, I know
dict_for_db["file_id"] = ObjectId(dict_for_db["file_id"])
return dict_for_db
return self.data
from_db(db_entry)
classmethod
¶
create a block from json (dictionary) stored in a db
Source code in pydatalab/blocks/base.py
@classmethod
def from_db(cls, db_entry):
"""create a block from json (dictionary) stored in a db"""
LOGGER.debug("Loading block %s from database object.", cls.__class__.__name__)
new_block = cls(
item_id=db_entry.get("item_id"),
collection_id=db_entry.get("collection_id"),
dictionary=db_entry,
)
if "file_id" in new_block.data:
new_block.data["file_id"] = str(new_block.data["file_id"])
if new_block.data.get("title", "") == new_block.description:
new_block.data["title"] = new_block.name
return new_block
to_web(self) -> Dict[str, Any]
¶
Returns a JSON serializable dictionary to render the data block on the web.
Source code in pydatalab/blocks/base.py
def to_web(self) -> Dict[str, Any]:
"""Returns a JSON serializable dictionary to render the data block on the web."""
block_errors = []
block_warnings = []
if self.plot_functions:
for plot in self.plot_functions:
with warnings.catch_warnings(record=True) as captured_warnings:
try:
plot()
except Exception as e:
block_errors.append(f"{self.__class__.__name__} raised error: {e}")
LOGGER.warning(
f"Could not create plot for {self.__class__.__name__}: {self.data}"
)
finally:
if captured_warnings:
block_warnings.extend(
[
f"{self.__class__.__name__} raised warning: {w.message}"
for w in captured_warnings
]
)
# If the last plotting run did not raise any errors or warnings, remove any old ones
if block_errors:
self.data["errors"] = block_errors
else:
self.data.pop("errors", None)
if block_warnings:
self.data["warnings"] = block_warnings
else:
self.data.pop("warnings", None)
return self.data
from_web(data)
classmethod
¶
Source code in pydatalab/blocks/base.py
@classmethod
def from_web(cls, data):
LOGGER.debug("Loading block %s from web request.", cls.__class__.__name__)
block = cls(
item_id=data.get("item_id"),
collection_id=data.get("collection_id"),
unique_id=data["block_id"],
)
block.update_from_web(data)
return block
update_from_web(self, data)
¶
update the object with data received from the website. Only updates fields that are specified in the dictionary- other fields are left alone
Source code in pydatalab/blocks/base.py
def update_from_web(self, data):
"""update the object with data received from the website. Only updates fields
that are specified in the dictionary- other fields are left alone"""
LOGGER.debug(
"Updating block %s from web request",
self.__class__.__name__,
)
self.data.update(data)
return self
generate_random_id()
¶
This function generates a random 15-length string for use as an id for a datablock. It should be sufficiently random that there is a negligible risk of ever generating the same id twice, so this is a unique id that can be used as a unique database refrence and also can be used as id in the DOM. Note: uuid.uuid4() would do this too, but I think the generated ids are too long and ugly.
The ids here are HTML id friendly, using lowercase letters and numbers. The first character is always a letter.
Source code in pydatalab/blocks/base.py
def generate_random_id():
"""This function generates a random 15-length string for use as an id for a datablock. It
should be sufficiently random that there is a negligible risk of ever generating
the same id twice, so this is a unique id that can be used as a unique database refrence
and also can be used as id in the DOM. Note: uuid.uuid4() would do this too, but I think
the generated ids are too long and ugly.
The ids here are HTML id friendly, using lowercase letters and numbers. The first character
is always a letter.
"""
randlist = [random.choice("abcdefghijklmnopqrstuvwxyz")] + random.choices(
"abcdefghijklmnopqrstuvwxyz0123456789", k=14
)
return "".join(randlist)