Base
pydatalab.blocks.base
¶
DataBlock
¶
base class for a data block.
Source code in pydatalab/blocks/base.py
class DataBlock:
"""base class for a data block."""
blocktype: str = "generic"
description: str = "Generic Block"
accepted_file_extensions: Sequence[str]
# values that are set by default if they are not supplied by the dictionary in init()
defaults: Dict[str, Any] = {}
# values cached on the block instance for faster retrieval
cache: Optional[Dict[str, Any]] = None
plot_functions: Optional[Sequence[Callable[[], None]]] = None
# whether this datablock can operate on collection data, or just individual items
_supports_collections: bool = False
def __init__(
self,
item_id: Optional[str] = None,
collection_id: Optional[str] = None,
dictionary=None,
unique_id=None,
):
if dictionary is None:
dictionary = {}
if item_id is None and not self._supports_collections:
raise RuntimeError(f"Must supply `item_id` to make {self.__class__.__name__}.")
if collection_id is not None and not self._supports_collections:
raise RuntimeError(
f"This block ({self.__class__.__name__}) does not support collections."
)
if item_id is not None and collection_id is not None:
raise RuntimeError("Must provide only one of `item_id` and `collection_id`.")
# Initialise cache
self.cache = {}
LOGGER.debug(
"Creating new block '%s' associated with item_id '%s'",
self.__class__.__name__,
item_id,
)
self.block_id = (
unique_id or generate_random_id()
) # this is supposed to be a unique id for use in html and the database.
self.data = {
"item_id": item_id,
"collection_id": collection_id,
"blocktype": self.blocktype,
"block_id": self.block_id,
**self.defaults,
}
# convert ObjectId file_ids to string to make handling them easier when sending to and from web
if "file_id" in self.data:
self.data["file_id"] = str(self.data["file_id"])
if "title" not in self.data:
self.data["title"] = self.description
self.data.update(
dictionary
) # this could overwrite blocktype and block_id. I think that's reasonable... maybe
LOGGER.debug(
"Initialised block %s for item ID %s or collection ID %s.",
self.__class__.__name__,
item_id,
collection_id,
)
def to_db(self):
"""returns a dictionary with the data for this
block, ready to be input into mongodb"""
LOGGER.debug("Casting block %s to database object.", self.__class__.__name__)
if "bokeh_plot_data" in self.data:
self.data.pop("bokeh_plot_data")
if "file_id" in self.data:
dict_for_db = self.data.copy() # gross, I know
dict_for_db["file_id"] = ObjectId(dict_for_db["file_id"])
return dict_for_db
return self.data
@classmethod
def from_db(cls, db_entry):
"""create a block from json (dictionary) stored in a db"""
LOGGER.debug("Loading block %s from database object.", cls.__class__.__name__)
new_block = cls(
item_id=db_entry.get("item_id"),
collection_id=db_entry.get("collection_id"),
dictionary=db_entry,
)
if "file_id" in new_block.data:
new_block.data["file_id"] = str(new_block.data["file_id"])
return new_block
def to_web(self):
"""returns a json-able dictionary to render the block on the web"""
if self.plot_functions:
for plot in self.plot_functions:
with warnings.catch_warnings(record=True) as captured_warnings:
try:
plot()
except Exception as e:
if "errors" not in self.data:
self.data["errors"] = []
self.data["errors"].append(f"{self.__class__.__name__} raised error: {e}")
LOGGER.warning(
f"Could not create plot for {self.__class__.__name__}: {self.data}"
)
finally:
if captured_warnings:
if "warnings" not in self.data:
self.data["warnings"] = []
self.data["warnings"].extend(
[
f"{self.__class__.__name__} raised warning: {w.message}"
for w in captured_warnings
]
)
return self.data
@classmethod
def from_web(cls, data):
LOGGER.debug("Loading block %s from web request.", cls.__class__.__name__)
block = cls(
item_id=data.get("item_id"),
collection_id=data.get("collection_id"),
unique_id=data["block_id"],
)
block.update_from_web(data)
return block
def update_from_web(self, data):
"""update the object with data received from the website. Only updates fields
that are specified in the dictionary- other fields are left alone"""
LOGGER.debug(
"Updating block %s from web request",
self.__class__.__name__,
)
self.data.update(data)
return self
blocktype: str
¶
cache: Optional[Dict[str, Any]]
¶
defaults: Dict[str, Any]
¶
description: str
¶
plot_functions: Optional[Sequence[Callable[[], NoneType]]]
¶
__init__(self, item_id: Optional[str] = None, collection_id: Optional[str] = None, dictionary = None, unique_id = None)
special
¶
Source code in pydatalab/blocks/base.py
def __init__(
self,
item_id: Optional[str] = None,
collection_id: Optional[str] = None,
dictionary=None,
unique_id=None,
):
if dictionary is None:
dictionary = {}
if item_id is None and not self._supports_collections:
raise RuntimeError(f"Must supply `item_id` to make {self.__class__.__name__}.")
if collection_id is not None and not self._supports_collections:
raise RuntimeError(
f"This block ({self.__class__.__name__}) does not support collections."
)
if item_id is not None and collection_id is not None:
raise RuntimeError("Must provide only one of `item_id` and `collection_id`.")
# Initialise cache
self.cache = {}
LOGGER.debug(
"Creating new block '%s' associated with item_id '%s'",
self.__class__.__name__,
item_id,
)
self.block_id = (
unique_id or generate_random_id()
) # this is supposed to be a unique id for use in html and the database.
self.data = {
"item_id": item_id,
"collection_id": collection_id,
"blocktype": self.blocktype,
"block_id": self.block_id,
**self.defaults,
}
# convert ObjectId file_ids to string to make handling them easier when sending to and from web
if "file_id" in self.data:
self.data["file_id"] = str(self.data["file_id"])
if "title" not in self.data:
self.data["title"] = self.description
self.data.update(
dictionary
) # this could overwrite blocktype and block_id. I think that's reasonable... maybe
LOGGER.debug(
"Initialised block %s for item ID %s or collection ID %s.",
self.__class__.__name__,
item_id,
collection_id,
)
to_db(self)
¶
returns a dictionary with the data for this block, ready to be input into mongodb
Source code in pydatalab/blocks/base.py
def to_db(self):
"""returns a dictionary with the data for this
block, ready to be input into mongodb"""
LOGGER.debug("Casting block %s to database object.", self.__class__.__name__)
if "bokeh_plot_data" in self.data:
self.data.pop("bokeh_plot_data")
if "file_id" in self.data:
dict_for_db = self.data.copy() # gross, I know
dict_for_db["file_id"] = ObjectId(dict_for_db["file_id"])
return dict_for_db
return self.data
from_db(db_entry)
classmethod
¶
create a block from json (dictionary) stored in a db
Source code in pydatalab/blocks/base.py
@classmethod
def from_db(cls, db_entry):
"""create a block from json (dictionary) stored in a db"""
LOGGER.debug("Loading block %s from database object.", cls.__class__.__name__)
new_block = cls(
item_id=db_entry.get("item_id"),
collection_id=db_entry.get("collection_id"),
dictionary=db_entry,
)
if "file_id" in new_block.data:
new_block.data["file_id"] = str(new_block.data["file_id"])
return new_block
to_web(self)
¶
returns a json-able dictionary to render the block on the web
Source code in pydatalab/blocks/base.py
def to_web(self):
"""returns a json-able dictionary to render the block on the web"""
if self.plot_functions:
for plot in self.plot_functions:
with warnings.catch_warnings(record=True) as captured_warnings:
try:
plot()
except Exception as e:
if "errors" not in self.data:
self.data["errors"] = []
self.data["errors"].append(f"{self.__class__.__name__} raised error: {e}")
LOGGER.warning(
f"Could not create plot for {self.__class__.__name__}: {self.data}"
)
finally:
if captured_warnings:
if "warnings" not in self.data:
self.data["warnings"] = []
self.data["warnings"].extend(
[
f"{self.__class__.__name__} raised warning: {w.message}"
for w in captured_warnings
]
)
return self.data
from_web(data)
classmethod
¶
Source code in pydatalab/blocks/base.py
@classmethod
def from_web(cls, data):
LOGGER.debug("Loading block %s from web request.", cls.__class__.__name__)
block = cls(
item_id=data.get("item_id"),
collection_id=data.get("collection_id"),
unique_id=data["block_id"],
)
block.update_from_web(data)
return block
update_from_web(self, data)
¶
update the object with data received from the website. Only updates fields that are specified in the dictionary- other fields are left alone
Source code in pydatalab/blocks/base.py
def update_from_web(self, data):
"""update the object with data received from the website. Only updates fields
that are specified in the dictionary- other fields are left alone"""
LOGGER.debug(
"Updating block %s from web request",
self.__class__.__name__,
)
self.data.update(data)
return self
generate_random_id()
¶
This function generates a random 15-length string for use as an id for a datablock. It should be sufficiently random that there is a negligible risk of ever generating the same id twice, so this is a unique id that can be used as a unique database refrence and also can be used as id in the DOM. Note: uuid.uuid4() would do this too, but I think the generated ids are too long and ugly.
The ids here are HTML id friendly, using lowercase letters and numbers. The first character is always a letter.
Source code in pydatalab/blocks/base.py
def generate_random_id():
"""This function generates a random 15-length string for use as an id for a datablock. It
should be sufficiently random that there is a negligible risk of ever generating
the same id twice, so this is a unique id that can be used as a unique database refrence
and also can be used as id in the DOM. Note: uuid.uuid4() would do this too, but I think
the generated ids are too long and ugly.
The ids here are HTML id friendly, using lowercase letters and numbers. The first character
is always a letter.
"""
randlist = [random.choice("abcdefghijklmnopqrstuvwxyz")] + random.choices(
"abcdefghijklmnopqrstuvwxyz0123456789", k=14
)
return "".join(randlist)