Skip to content

Common

pydatalab.blocks.common

EXCEL_LIKE_EXTENSIONS

NotSupportedBlock (DataBlock)

Source code in pydatalab/blocks/common.py
class NotSupportedBlock(DataBlock):
    name = "Not Supported"
    blocktype = "notsupported"
    description = "A placeholder block type when the requested block is not supported by the current version of the server."

blocktype: str

A short (unique) string key specifying the type of block.

description: str

A longer description outlining the purpose and capability of the block.

name: str

The human-readable block name specifying which technique or file format it pertains to.

CommentBlock (DataBlock)

Source code in pydatalab/blocks/common.py
class CommentBlock(DataBlock):
    name = "Comment"
    blocktype = "comment"
    description = "Add a rich text comment to the document."
    _supports_collections = True

blocktype: str

A short (unique) string key specifying the type of block.

description: str

A longer description outlining the purpose and capability of the block.

name: str

The human-readable block name specifying which technique or file format it pertains to.

MediaBlock (DataBlock)

Source code in pydatalab/blocks/common.py
class MediaBlock(DataBlock):
    name = "Media"
    blocktype = "media"
    description = "Display an image or a video of a supported format."
    accepted_file_extensions = (".png", ".jpeg", ".jpg", ".tif", ".tiff", ".mp4", ".mov", ".webm")
    _supports_collections = False

    @property
    def plot_functions(self):
        return (self.encode_tiff,)

    def encode_tiff(self):
        if "file_id" not in self.data:
            LOGGER.warning("ImageBlock.encode_tiff(): No file set in the DataBlock")
            return
        if "b64_encoded_image" not in self.data:
            self.data["b64_encoded_image"] = {}
        file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True)
        if file_info["name"].endswith(".tif") or file_info["name"].endswith(".tiff"):
            im = Image.open(file_info["location"])
            LOGGER.warning("Making base64 encoding of tif")
            with io.BytesIO() as f:
                im.save(f, format="PNG")
                f.seek(0)
                self.data["b64_encoded_image"][self.data["file_id"]] = base64.b64encode(
                    f.getvalue()
                ).decode()

accepted_file_extensions: tuple[str, ...] | None

A list of file extensions that the block will attempt to read.

blocktype: str

A short (unique) string key specifying the type of block.

description: str

A longer description outlining the purpose and capability of the block.

name: str

The human-readable block name specifying which technique or file format it pertains to.

plot_functions property readonly

encode_tiff(self)

Source code in pydatalab/blocks/common.py
def encode_tiff(self):
    if "file_id" not in self.data:
        LOGGER.warning("ImageBlock.encode_tiff(): No file set in the DataBlock")
        return
    if "b64_encoded_image" not in self.data:
        self.data["b64_encoded_image"] = {}
    file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True)
    if file_info["name"].endswith(".tif") or file_info["name"].endswith(".tiff"):
        im = Image.open(file_info["location"])
        LOGGER.warning("Making base64 encoding of tif")
        with io.BytesIO() as f:
            im.save(f, format="PNG")
            f.seek(0)
            self.data["b64_encoded_image"][self.data["file_id"]] = base64.b64encode(
                f.getvalue()
            ).decode()

TabularDataBlock (DataBlock)

This block simply tries to read the given file with pandas, and expose an interface to plot its columns as scatter points.

Source code in pydatalab/blocks/common.py
class TabularDataBlock(DataBlock):
    """This block simply tries to read the given file with pandas, and
    expose an interface to plot its columns as scatter points.

    """

    blocktype = "tabular"
    name = "Tabular Data Block"
    description = "This block will load tabular data from common plain text files and Excel-like spreadsheets and allow you to create simple scatter plots of the columns within."
    accepted_file_extensions = (".csv", ".txt", ".tsv", ".dat", *EXCEL_LIKE_EXTENSIONS)

    @property
    def plot_functions(self):
        return (self.plot_df,)

    def _load(self) -> pd.DataFrame:
        if "file_id" not in self.data:
            return

        file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True)

        return self.load(file_info["location"])

    @classmethod
    def load(cls, location: Path | str) -> pd.DataFrame:
        """Throw several pandas readers at the target file.

        If an excel-like format, try to read it with `pandas.read_excel()`.
        Then, try well-described formats such as JSON, Parquet and Feather.
        Otherwise, use decreasingly strict csv parsers until successful.

        Returns:
            pd.DataFrame: The loaded dataframe.

        """
        if not isinstance(location, Path):
            location = Path(location)

        if location.suffix in EXCEL_LIKE_EXTENSIONS:
            try:
                df_dict = pd.read_excel(location, sheet_name=None)
            except Exception as e:
                raise RuntimeError(
                    f"`pandas.read_excel()` was not able to read the file. Error: {e}"
                )

            df = next(iter(df_dict.values()))
            if len(df_dict) > 1:
                warnings.warn(
                    f"Found multiple sheets in spreadsheet file {df_dict.keys()}, only using the first one."
                )

            return df

        try:
            df = pd.read_csv(
                location,
                sep=None,
                encoding_errors="backslashreplace",
                skip_blank_lines=False,
                engine="python",
            )

            if df.isnull().values.any():
                warnings.warn(
                    "Loading file with less strict parser: columns were previously detected as {df.columns}"
                )
                df = pd.read_csv(
                    location,
                    sep=None,
                    names=range(df.shape[1]),
                    comment="#",
                    header=None,
                    encoding_errors="backslashreplace",
                    skip_blank_lines=False,
                    engine="python",
                )
                # Drop a row if entirety is NaN
                df.dropna(axis=1, inplace=True)
        except Exception as e:
            raise RuntimeError(f"`pandas.read_csv()` was not able to read the file. Error: {e}")

        return df

    def plot_df(self):
        import bokeh.embed

        from pydatalab.bokeh_plots import DATALAB_BOKEH_THEME, selectable_axes_plot

        df = self._load()
        if df is None:
            return
        plot = selectable_axes_plot(
            df,
            plot_points=True,
            plot_line=False,
            show_table=True,
        )

        self.data["bokeh_plot_data"] = bokeh.embed.json_item(plot, theme=DATALAB_BOKEH_THEME)

accepted_file_extensions: tuple[str, ...] | None

A list of file extensions that the block will attempt to read.

blocktype: str

A short (unique) string key specifying the type of block.

description: str

A longer description outlining the purpose and capability of the block.

name: str

The human-readable block name specifying which technique or file format it pertains to.

plot_functions property readonly

load(location: pathlib.Path | str) -> DataFrame classmethod

Throw several pandas readers at the target file.

If an excel-like format, try to read it with pandas.read_excel(). Then, try well-described formats such as JSON, Parquet and Feather. Otherwise, use decreasingly strict csv parsers until successful.

Returns:

Type Description
pd.DataFrame

The loaded dataframe.

Source code in pydatalab/blocks/common.py
@classmethod
def load(cls, location: Path | str) -> pd.DataFrame:
    """Throw several pandas readers at the target file.

    If an excel-like format, try to read it with `pandas.read_excel()`.
    Then, try well-described formats such as JSON, Parquet and Feather.
    Otherwise, use decreasingly strict csv parsers until successful.

    Returns:
        pd.DataFrame: The loaded dataframe.

    """
    if not isinstance(location, Path):
        location = Path(location)

    if location.suffix in EXCEL_LIKE_EXTENSIONS:
        try:
            df_dict = pd.read_excel(location, sheet_name=None)
        except Exception as e:
            raise RuntimeError(
                f"`pandas.read_excel()` was not able to read the file. Error: {e}"
            )

        df = next(iter(df_dict.values()))
        if len(df_dict) > 1:
            warnings.warn(
                f"Found multiple sheets in spreadsheet file {df_dict.keys()}, only using the first one."
            )

        return df

    try:
        df = pd.read_csv(
            location,
            sep=None,
            encoding_errors="backslashreplace",
            skip_blank_lines=False,
            engine="python",
        )

        if df.isnull().values.any():
            warnings.warn(
                "Loading file with less strict parser: columns were previously detected as {df.columns}"
            )
            df = pd.read_csv(
                location,
                sep=None,
                names=range(df.shape[1]),
                comment="#",
                header=None,
                encoding_errors="backslashreplace",
                skip_blank_lines=False,
                engine="python",
            )
            # Drop a row if entirety is NaN
            df.dropna(axis=1, inplace=True)
    except Exception as e:
        raise RuntimeError(f"`pandas.read_csv()` was not able to read the file. Error: {e}")

    return df

plot_df(self)

Source code in pydatalab/blocks/common.py
def plot_df(self):
    import bokeh.embed

    from pydatalab.bokeh_plots import DATALAB_BOKEH_THEME, selectable_axes_plot

    df = self._load()
    if df is None:
        return
    plot = selectable_axes_plot(
        df,
        plot_points=True,
        plot_line=False,
        show_table=True,
    )

    self.data["bokeh_plot_data"] = bokeh.embed.json_item(plot, theme=DATALAB_BOKEH_THEME)