Skip to content

dataframe

Interface with dataframe

load_columns(release, datatype, dataset) #

Loads the given columns for a release and datatype

Source code in src/sdss_explorer/server/dataframe.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def load_columns(release: str, datatype: str, dataset: str):
    """Loads the given columns for a release and datatype"""
    vastra = resolve_vastra(release)
    with open(
        os.path.join(
            settings.datapath,
            release,
            f"columnsAll{datatype.capitalize()}-{vastra}.json",
        ),
        "r",
        encoding="utf-8",
    ) as f:
        columns = json.load(f)
    return columns[dataset]

load_dataframe(release, datatype, dataset) #

Loads base dataframe and applies dataset filter IMMEDIATELY to reduce memory usage

Source code in src/sdss_explorer/server/dataframe.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def load_dataframe(
    release: str, datatype: str, dataset: str
) -> tuple[vx.DataFrame | None, list[str] | None]:
    """Loads base dataframe and applies dataset filter IMMEDIATELY to reduce memory usage"""
    dataroot_dir = settings.datapath
    if dataroot_dir:
        logger.debug("opening dataframe")
        vastra = resolve_vastra(release)
        cols = load_columns(release, datatype, dataset)
        # TODO: when we change the filegenerator, fix this here
        validCols = [
            col for col in cols if ("_flags" not in col) and (col != "pipeline")
        ]
        df = vx.open(
            os.path.join(
                dataroot_dir,
                release,
                f"explorerAll{datatype.capitalize()}-{vastra}.hdf5",
            )
        )
        dff = df[df[f"pipeline == '{dataset}'"]].extract()
        logger.debug("loaded dataframe!")
        return dff, validCols
    else:
        logger.critical("Cannot load df!")
        return None, None

load_mappings(release) cached #

Loads release-aware mappings parquet with backward-compatible fallbacks.

Source code in src/sdss_explorer/server/dataframe.py
16
17
18
19
20
21
22
23
24
25
26
27
28
@lru_cache
def load_mappings(release: str):
    """Loads release-aware mappings parquet with backward-compatible fallbacks."""
    release = (release or "dr19").lower()
    primary = os.path.join(settings.datapath, release, f"mappings_{release}.parquet")
    if os.path.exists(primary):
        return vx.open(primary)

    backup = os.path.join(settings.datapath, f"mappings_{release}.parquet")
    if os.path.exists(backup):
        return vx.open(backup)

    return vx.open(os.path.join(settings.datapath, "mappings.parquet"))