Skip to content

dataframe

Interface with dataframe

load_columns(release, datatype, dataset) #

Loads the given columns for a release and datatype

Source code in src/sdss_explorer/server/dataframe.py
15
16
17
18
19
20
21
22
23
24
def load_columns(release: str, datatype: str, dataset: str):
    """Loads the given columns for a release and datatype"""
    with open(
            os.path.join(
                settings.datapath,
                release,
                f"columnsAll{datatype.capitalize()}-{settings.vastra}.json",
            )) as f:
        columns = json.load(f)
    return columns[dataset]

load_dataframe(release, datatype, dataset) #

Loads base dataframe and applies dataset filter IMMEDIATELY to reduce memory usage

Source code in src/sdss_explorer/server/dataframe.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def load_dataframe(
        release: str, datatype: str,
        dataset: str) -> tuple[vx.DataFrame | None, list[str] | None]:
    """Loads base dataframe and applies dataset filter IMMEDIATELY to reduce memory usage"""
    dataroot_dir = settings.datapath
    if dataroot_dir:
        logger.debug("opening dataframe")
        cols = load_columns(release, datatype, dataset)
        # TODO: when we change the filegenerator, fix this here
        validCols = [
            col for col in cols
            if ("_flags" not in col) and (col != "pipeline")
        ]
        df = vx.open(
            os.path.join(
                dataroot_dir,
                release,
                f"explorerAll{datatype.capitalize()}-{settings.vastra}.hdf5",
            ))
        dff = df[df[f"pipeline == '{dataset}'"]].extract()
        logger.debug("loaded dataframe!")
        return dff, validCols
    else:
        logger.critical("Cannot load df!")
        return None, None