import fsspec
import numpy as np
import pandas as pd
from kedro.io.core import get_protocol_and_path
[docs]def upload_bcf_as_png(
bcf_file: fsspec.core.OpenFile, file_sizes: np.ndarray, output_path: str
) -> None:
"""
Stores .png files stored in a .bcf files in a `output_path`.
Args:
bcf_file (fsspec.core.OpenFile): File descriptior to the .bcf file.
file_sizes (np.ndarray): File sizes read in `read_bcf_metadata` node.
output_path (str): Path where the .png files are stored
"""
offsets = np.append(np.uint64(0), np.add.accumulate(file_sizes))
protocol, _ = get_protocol_and_path(output_path)
_fs = fsspec.filesystem(protocol)
for i in range(len(file_sizes)):
bcf_file.seek(np.uint64(len(offsets) * 8 + offsets[i]))
out = bcf_file.read(offsets[i + 1] - offsets[i])
filename = f"{output_path}/{i}.png"
with _fs.open(filename, "wb") as f:
f.write(out)
return None
[docs]def read_labels(label_file: fsspec.core.OpenFile) -> pd.DataFrame:
"""
Stores reads labels saved under `label_file` and converts it
into a cvs file
Args:
label_file (fsspec.core.OpenFile): File descriptor to the .label file
Returns:
pd.DataFrame: Read labels as dataframe
"""
labels = np.frombuffer(label_file.read(), dtype=np.uint32)
df_labels = pd.DataFrame(data=labels, columns=["labels"])
return df_labels
[docs]def upload_labels_as_csv(
df_labels: pd.DataFrame, output_path: str
): # TODO: use fsspec here
"""
Stores passed `df_labels` in the `output_path`.
Args:
df_labels (pd.DataFrame): labels
output_path (str): Pathe where the labels.csv file is stored
"""
df_labels.to_csv(f"{output_path}/labels.csv")
return None