Skip to content



This is the main entry point for the pygen package. It contains the main functions for generating SDKs.

build_wheel(model_id, client=None, *, top_level_package=None, client_name=None, default_instance_space=None, output_dir=Path('dist'), format_code=True, config=None)

Generates a wheel with Python SDK tailored to the given Data Model(s).


Name Type Description Default
model_id DataModel | Sequence[DataModel]

The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s) directly to avoid fetching them from CDF.

client Optional[CogniteClient]

The cognite client used for fetching the data model. This is required if you pass in data models ID(s) in the model_id argument and not a data model.

top_level_package Optional[str]

The name of the top level package for the SDK. For example, if we have top_level_package=apm and the client_name=APMClient, then the importing the client will be from apm import APMClient. If nothing is passed, the package will be [external_id:snake] of the first data model given, while the client name will be [external_id:pascal_case]

client_name Optional[str]

The name of the client class. For example, APMClient. See above for more details.

default_instance_space str | None

The default instance space to use for the generated SDK. If not provided, the space must be specified when creating, deleting, and retrieving nodes and edges.

output_dir Path

The location to output the generated SDK wheel. Defaults to "dist".

format_code bool

Whether to format the generated code using black. Defaults to True.

config Optional[PygenConfig]

The configuration used to control how to generate the SDK.

Source code in cognite/pygen/
def build_wheel(
    model_id: DataModel | Sequence[DataModel],
    client: Optional[CogniteClient] = None,
    top_level_package: Optional[str] = None,
    client_name: Optional[str] = None,
    default_instance_space: str | None = None,
    output_dir: Path = Path("dist"),
    format_code: bool = True,
    config: Optional[PygenConfig] = None,
) -> None:
    Generates a wheel with Python SDK tailored to the given Data Model(s).

        model_id: The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s)
            directly to avoid fetching them from CDF.
        client: The cognite client used for fetching the data model. This is required if you pass in
            data models ID(s) in the `model_id` argument and not a data model.
        top_level_package: The name of the top level package for the SDK. For example,
            if we have top_level_package=`apm` and the client_name=`APMClient`, then
            the importing the client will be `from apm import APMClient`. If nothing is passed,
            the package will be [external_id:snake] of the first data model given, while
            the client name will be [external_id:pascal_case]
        client_name: The name of the client class. For example, `APMClient`. See above for more details.
        default_instance_space: The default instance space to use for the generated SDK. If not provided,
            the space must be specified when creating, deleting, and retrieving nodes and edges.
        output_dir: The location to output the generated SDK wheel. Defaults to "dist".
        format_code: Whether to format the generated code using black. Defaults to True.
        config: The configuration used to control how to generate the SDK.
        from build import ProjectBuilder  # type: ignore[import]
    except ImportError:
        raise ImportError(
            "'build' is required to build wheel. Install pygen with `pip install pygen[cli] or "
            "install build directly `pip install build`."
        ) from None

    data_model = _get_data_model(model_id, client, print)
    folder_name = _create_folder_name(
        data_model.as_id() if isinstance(data_model, dm.DataModel) else data_model.as_ids()
    build_dir = Path(tempfile.gettempdir()) / "pygen_build" / folder_name
    if build_dir.exists():
        except Exception as e:
            print(f"Failed to clean temporary build directory {build_dir}: {e}")

    external_id = _extract_external_id(data_model)
    if top_level_package is None:
        top_level_package = _default_top_level_package(external_id)
    if client_name is None:
        client_name = _default_client_name(external_id)
        output_dir=build_dir / _top_level_to_path(top_level_package),

    generate_pyproject_toml(build_dir, top_level_package)

    output_dir.mkdir(exist_ok=True, parents=True)
    ProjectBuilder(build_dir).build(distribution="wheel", output_directory=str(output_dir))

    print(f"Generated SDK wheel at {output_dir}")

generate_sdk(model_id, client=None, top_level_package=None, client_name=None, default_instance_space=None, output_dir=None, logger=None, overwrite=False, format_code=False, config=None, return_sdk_files=False)

generate_sdk(model_id: DataModel | Sequence[DataModel], client: Optional[CogniteClient] = None, top_level_package: Optional[str] = None, client_name: Optional[str] = None, default_instance_space: str | None = None, output_dir: Optional[Path] = None, logger: Optional[Callable[[str], None]] = None, overwrite: bool = False, format_code: bool = False, config: Optional[PygenConfig] = None, return_sdk_files: Literal[False] = False) -> None
generate_sdk(model_id: DataModel | Sequence[DataModel], client: Optional[CogniteClient] = None, top_level_package: Optional[str] = None, client_name: Optional[str] = None, default_instance_space: str | None = None, output_dir: Optional[Path] = None, logger: Optional[Callable[[str], None]] = None, overwrite: bool = False, format_code: bool = False, config: Optional[PygenConfig] = None, return_sdk_files: Literal[True] = False) -> dict[Path, str]

Generates a Python SDK tailored to the given Data Model(s).


Name Type Description Default
model_id DataModel | Sequence[DataModel]

The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s) directly to avoid fetching them from CDF.

client Optional[CogniteClient]

The cognite client used for fetching the data model. This is required if you pass in data models ID(s) in the model_id argument and not a data model.

top_level_package Optional[str]

The name of the top level package for the SDK. For example, if we have top_level_package=apm and the client_name=APMClient, then the importing the client will be from apm import APMClient. If nothing is passed, the package will be [external_id:snake] of the first data model given, while the client name will be [external_id:pascal_case]. In the case, pygen is part of another package, the top level package should be the full package name. For example, cognite.apm.

client_name Optional[str]

The name of the client class. For example, APMClient. See above for more details.

default_instance_space str | None

The default instance space to use for the generated SDK. If not provided, the space must be specified when creating, deleting, and retrieving nodes and edges.

output_dir Optional[Path]

The location to output the generated SDK. Defaults: Path.cwd() / Path(top_level_package.replace(".", "/")).

logger Optional[Callable[[str], None]]

A logger function to log progress. Defaults to print.

overwrite bool

Whether to overwrite the output directory if it already exists. Defaults to False.

format_code bool

Whether to format the generated code using black. Defaults to False.

config Optional[PygenConfig]

The configuration used to control how to generate the SDK.

return_sdk_files bool

Whether to return the generated SDK files as a dictionary. Defaults to False. This is useful for granular control of how to write the SDK to disk.

Source code in cognite/pygen/
def generate_sdk(
    model_id: DataModel | Sequence[DataModel],
    client: Optional[CogniteClient] = None,
    top_level_package: Optional[str] = None,
    client_name: Optional[str] = None,
    default_instance_space: str | None = None,
    output_dir: Optional[Path] = None,
    logger: Optional[Callable[[str], None]] = None,
    overwrite: bool = False,
    format_code: bool = False,
    config: Optional[PygenConfig] = None,
    return_sdk_files: bool = False,
) -> None | dict[Path, str]:
    Generates a Python SDK tailored to the given Data Model(s).

        model_id: The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s)
            directly to avoid fetching them from CDF.
        client: The cognite client used for fetching the data model. This is required if you pass in
            data models ID(s) in the `model_id` argument and not a data model.
        top_level_package: The name of the top level package for the SDK. For example,
            if we have top_level_package=`apm` and the client_name=`APMClient`, then
            the importing the client will be `from apm import APMClient`. If nothing is passed,
            the package will be [external_id:snake] of the first data model given, while
            the client name will be [external_id:pascal_case]. In the case, pygen is part of another package,
            the top level package should be the full package name. For example, `cognite.apm`.
        client_name: The name of the client class. For example, `APMClient`. See above for more details.
        default_instance_space: The default instance space to use for the generated SDK. If not provided,
            the space must be specified when creating, deleting, and retrieving nodes and edges.
        output_dir: The location to output the generated SDK.
            Defaults: Path.cwd() / Path(top_level_package.replace(".", "/")).
        logger: A logger function to log progress. Defaults to print.
        overwrite: Whether to overwrite the output directory if it already exists. Defaults to False.
        format_code: Whether to format the generated code using black. Defaults to False.
        config: The configuration used to control how to generate the SDK.
        return_sdk_files: Whether to return the generated SDK files as a dictionary. Defaults to False.
            This is useful for granular control of how to write the SDK to disk.
    return _generate_sdk(

generate_sdk_notebook(model_id, client=None, top_level_package=None, client_name=None, default_instance_space=None, config=None, clean_pygen_temp_dir=True)

Generates a Python SDK tailored to the given Data Model(s) and imports it into the current Python session.

This function is wrapper around generate_sdk. It is intended to be used in a Jupyter notebook. The differences are that it:

  • The SDK is generated in a temporary directory and added to the sys.path. This is such that it becomes available to be imported in the current Python session.
  • The signature is simplified.
  • An instantiated client of the generated SDK is returned.


Name Type Description Default
model_id DataModel | Sequence[DataModel]

The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s) directly to avoid fetching them from CDF.

client Optional[CogniteClient]

The cognite client used for fetching the data model. This is required if you pass in data models ID(s) in the model_id argument and not a data model.

top_level_package Optional[str]

The name of the top level package for the SDK. For example, if we have top_level_package=apm and the client_name=APMClient, then the importing the client will be from apm import APMClient. If nothing is passed, the package will be [external_id:snake] of the first data model given, while the client name will be [external_id:pascal_case]

client_name Optional[str]

The name of the client class. For example, APMClient. See above for more details.

default_instance_space str | None

The default instance space to use for the generated SDK. If not provided, the space must be specified when creating, deleting, and retrieving nodes and edges.

config Optional[PygenConfig]

The configuration used to control how to generate the SDK.

clean_pygen_temp_dir bool

Whether to clean the temporary directory used to store the generated SDK. Defaults to True.



Type Description

The instantiated generated client class.

Source code in cognite/pygen/
def generate_sdk_notebook(
    model_id: DataModel | Sequence[DataModel],
    client: Optional[CogniteClient] = None,
    top_level_package: Optional[str] = None,
    client_name: Optional[str] = None,
    default_instance_space: str | None = None,
    config: Optional[PygenConfig] = None,
    clean_pygen_temp_dir: bool = True,
) -> Any:
    Generates a Python SDK tailored to the given Data Model(s) and imports it into the current Python session.

    This function is wrapper around generate_sdk. It is intended to be used in a Jupyter notebook.
    The differences are that it:

    * The SDK is generated in a temporary directory and added to the sys.path. This is such that it
      becomes available to be imported in the current Python session.
    * The signature is simplified.
    * An instantiated client of the generated SDK is returned.

        model_id: The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s)
            directly to avoid fetching them from CDF.
        client: The cognite client used for fetching the data model. This is required if you pass in
            data models ID(s) in the `model_id` argument and not a data model.
        top_level_package: The name of the top level package for the SDK. For example,
            if we have top_level_package=`apm` and the client_name=`APMClient`, then
            the importing the client will be `from apm import APMClient`. If nothing is passed,
            the package will be [external_id:snake] of the first data model given, while
            the client name will be [external_id:pascal_case]
        client_name: The name of the client class. For example, `APMClient`. See above for more details.
        default_instance_space: The default instance space to use for the generated SDK. If not provided,
            the space must be specified when creating, deleting, and retrieving nodes and edges.
        config: The configuration used to control how to generate the SDK.
        clean_pygen_temp_dir: Whether to clean the temporary directory used to store the generated SDK.
            Defaults to True.

        The instantiated generated client class.
    data_model = _get_data_model(model_id, client, print)
    folder_name = _create_folder_name(
        data_model.as_id() if isinstance(data_model, dm.DataModel) else data_model.as_ids()
    output_dir = Path(tempfile.gettempdir()) / "pygen" / folder_name
    if clean_pygen_temp_dir and output_dir.exists():
        except Exception as e:
            print(f"Failed to clean temporary directory {output_dir}: {e}")
            print(f"Cleaned temporary directory {output_dir}")

    external_id = _extract_external_id(data_model)
    if top_level_package is None:
        top_level_package = _default_top_level_package(external_id)
    if client_name is None:
        client_name = _default_client_name(external_id)
        output_dir=output_dir / _top_level_to_path(top_level_package),
    if str(output_dir) not in sys.path:
        print(f"Added {output_dir} to sys.path to enable import")
        print(f"{output_dir} already in sys.path")
    module = vars(importlib.import_module(top_level_package))
    print(f"Imported {top_level_package}")
    print("You can now use the generated SDK in the current Python session.")
    if isinstance(data_model, dm.DataModel):
        view = data_model.views[0]
    elif isinstance(data_model, Sequence):
        view = data_model[0].views[0]
        view = None

    if view:
            "The data classes are available by importing, for example, "
            f"`from {top_level_package}.data_classes import {DataClass.to_base_name(view)}Write`"
    return module[client_name](client)

load_cognite_client_from_toml(toml_file='config.toml', section='cognite')

This is a small helper function to load a CogniteClient from a toml file.

The default name of the config file is "config.toml" and it should look like this:

project = "<cdf-project>"
tenant_id = "<tenant-id>"
cdf_cluster = "<cdf-cluster>"
client_id = "<client-id>"
client_secret = "<client-secret>"


Name Type Description Default
toml_file Path | str

Path to toml file

section str | None

Name of the section in the toml file to use. If None, use the top level of the toml file. Defaults to "cognite".



Type Description

A CogniteClient with configurations from the toml file.

Source code in cognite/pygen/utils/
def load_cognite_client_from_toml(
    toml_file: Path | str = "config.toml", section: str | None = "cognite"
) -> CogniteClient:
    This is a small helper function to load a CogniteClient from a toml file.

    The default name of the config file is "config.toml" and it should look like this:

    project = "<cdf-project>"
    tenant_id = "<tenant-id>"
    cdf_cluster = "<cdf-cluster>"
    client_id = "<client-id>"
    client_secret = "<client-secret>"

        toml_file: Path to toml file
        section: Name of the section in the toml file to use. If None, use the top level of the toml file.
                 Defaults to "cognite".

        A CogniteClient with configurations from the toml file.
    import toml

    toml_content = toml.load(toml_file)
    if section is not None:
        toml_content = toml_content[section]

    login_flow = toml_content.pop("login_flow", None)
    if login_flow == "interactive":
        return CogniteClient.default_oauth_interactive(**toml_content)
        return CogniteClient.default_oauth_client_credentials(**toml_content)