Skip to content

Pygen

cognite.pygen

This is the main entry point for the pygen package. It contains the main functions for generating SDKs.

build_wheel(model_id, client=None, *, top_level_package=None, client_name=None, default_instance_space=None, output_dir=Path('dist'), format_code=True, config=None)

Generates a wheel with Python SDK tailored to the given Data Model(s).

Parameters:

Name Type Description Default
model_id DataModel | Sequence[DataModel]

The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s) directly to avoid fetching them from CDF.

required
client Optional[CogniteClient]

The cognite client used for fetching the data model. This is required if you pass in data models ID(s) in the model_id argument and not a data model.

None
top_level_package Optional[str]

The name of the top level package for the SDK. For example, if we have top_level_package=apm and the client_name=APMClient, then the importing the client will be from apm import APMClient. If nothing is passed, the package will be [external_id:snake] of the first data model given, while the client name will be [external_id:pascal_case]

None
client_name Optional[str]

The name of the client class. For example, APMClient. See above for more details.

None
default_instance_space str | None

The default instance space to use for the generated SDK. If not provided, the space must be specified when creating, deleting, and retrieving nodes and edges.

None
output_dir Path

The location to output the generated SDK wheel. Defaults to "dist".

Path('dist')
format_code bool

Whether to format the generated code using black. Defaults to True.

True
config Optional[PygenConfig]

The configuration used to control how to generate the SDK.

None
Source code in cognite/pygen/_build.py
def build_wheel(
    model_id: DataModel | Sequence[DataModel],
    client: Optional[CogniteClient] = None,
    *,
    top_level_package: Optional[str] = None,
    client_name: Optional[str] = None,
    default_instance_space: str | None = None,
    output_dir: Path = Path("dist"),
    format_code: bool = True,
    config: Optional[PygenConfig] = None,
) -> None:
    """
    Generates a wheel with Python SDK tailored to the given Data Model(s).

    Args:
        model_id: The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s)
            directly to avoid fetching them from CDF.
        client: The cognite client used for fetching the data model. This is required if you pass in
            data models ID(s) in the `model_id` argument and not a data model.
        top_level_package: The name of the top level package for the SDK. For example,
            if we have top_level_package=`apm` and the client_name=`APMClient`, then
            the importing the client will be `from apm import APMClient`. If nothing is passed,
            the package will be [external_id:snake] of the first data model given, while
            the client name will be [external_id:pascal_case]
        client_name: The name of the client class. For example, `APMClient`. See above for more details.
        default_instance_space: The default instance space to use for the generated SDK. If not provided,
            the space must be specified when creating, deleting, and retrieving nodes and edges.
        output_dir: The location to output the generated SDK wheel. Defaults to "dist".
        format_code: Whether to format the generated code using black. Defaults to True.
        config: The configuration used to control how to generate the SDK.
    """
    try:
        from build import ProjectBuilder  # type: ignore[import]
    except ImportError:
        raise ImportError(
            "'build' is required to build wheel. Install pygen with `pip install pygen[cli] or "
            "install build directly `pip install build`."
        ) from None

    data_model = _get_data_model(model_id, client, print)
    folder_name = _create_folder_name(
        data_model.as_id() if isinstance(data_model, dm.DataModel) else data_model.as_ids()
    )
    build_dir = Path(tempfile.gettempdir()) / "pygen_build" / folder_name
    if build_dir.exists():
        try:
            shutil.rmtree(build_dir)
        except Exception as e:
            print(f"Failed to clean temporary build directory {build_dir}: {e}")

    external_id = _extract_external_id(data_model)
    if top_level_package is None:
        top_level_package = _default_top_level_package(external_id)
    if client_name is None:
        client_name = _default_client_name(external_id)
    generate_sdk(
        data_model,
        client,
        top_level_package=top_level_package,
        client_name=client_name,
        default_instance_space=default_instance_space,
        output_dir=build_dir / _top_level_to_path(top_level_package),
        overwrite=True,
        format_code=format_code,
        config=config,
    )

    generate_pyproject_toml(build_dir, top_level_package)

    output_dir.mkdir(exist_ok=True, parents=True)
    ProjectBuilder(build_dir).build(distribution="wheel", output_directory=str(output_dir))

    print(f"Generated SDK wheel at {output_dir}")

generate_sdk(model_id, client=None, top_level_package=None, client_name=None, default_instance_space=None, output_dir=None, logger=None, overwrite=False, format_code=True, config=None, return_sdk_files=False)

generate_sdk(model_id: DataModel | Sequence[DataModel], client: Optional[CogniteClient] = None, top_level_package: Optional[str] = None, client_name: Optional[str] = None, default_instance_space: str | None = None, output_dir: Optional[Path] = None, logger: Optional[Callable[[str], None]] = None, overwrite: bool = False, format_code: bool = True, config: Optional[PygenConfig] = None, return_sdk_files: Literal[False] = False) -> None
generate_sdk(model_id: DataModel | Sequence[DataModel], client: Optional[CogniteClient] = None, top_level_package: Optional[str] = None, client_name: Optional[str] = None, default_instance_space: str | None = None, output_dir: Optional[Path] = None, logger: Optional[Callable[[str], None]] = None, overwrite: bool = False, format_code: bool = True, config: Optional[PygenConfig] = None, return_sdk_files: Literal[True] = False) -> dict[Path, str]

Generates a Python SDK tailored to the given Data Model(s).

Parameters:

Name Type Description Default
model_id DataModel | Sequence[DataModel]

The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s) directly to avoid fetching them from CDF.

required
client Optional[CogniteClient]

The cognite client used for fetching the data model. This is required if you pass in data models ID(s) in the model_id argument and not a data model.

None
top_level_package Optional[str]

The name of the top level package for the SDK. For example, if we have top_level_package=apm and the client_name=APMClient, then the importing the client will be from apm import APMClient. If nothing is passed, the package will be [external_id:snake] of the first data model given, while the client name will be [external_id:pascal_case]. In the case, pygen is part of another package, the top level package should be the full package name. For example, cognite.apm.

None
client_name Optional[str]

The name of the client class. For example, APMClient. See above for more details.

None
default_instance_space str | None

The default instance space to use for the generated SDK. If not provided, the space must be specified when creating, deleting, and retrieving nodes and edges.

None
output_dir Optional[Path]

The location to output the generated SDK. Defaults: Path.cwd() / Path(top_level_package.replace(".", "/")).

None
logger Optional[Callable[[str], None]]

A logger function to log progress. Defaults to print.

None
overwrite bool

Whether to overwrite the output directory if it already exists. Defaults to False.

False
format_code bool

Whether to format the generated code using black. Defaults to True.

True
config Optional[PygenConfig]

The configuration used to control how to generate the SDK.

None
return_sdk_files bool

Whether to return the generated SDK files as a dictionary. Defaults to False. This is useful for granular control of how to write the SDK to disk.

False
Source code in cognite/pygen/_generator.py
def generate_sdk(
    model_id: DataModel | Sequence[DataModel],
    client: Optional[CogniteClient] = None,
    top_level_package: Optional[str] = None,
    client_name: Optional[str] = None,
    default_instance_space: str | None = None,
    output_dir: Optional[Path] = None,
    logger: Optional[Callable[[str], None]] = None,
    overwrite: bool = False,
    format_code: bool = True,
    config: Optional[PygenConfig] = None,
    return_sdk_files: bool = False,
) -> None | dict[Path, str]:
    """
    Generates a Python SDK tailored to the given Data Model(s).

    Args:
        model_id: The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s)
            directly to avoid fetching them from CDF.
        client: The cognite client used for fetching the data model. This is required if you pass in
            data models ID(s) in the `model_id` argument and not a data model.
        top_level_package: The name of the top level package for the SDK. For example,
            if we have top_level_package=`apm` and the client_name=`APMClient`, then
            the importing the client will be `from apm import APMClient`. If nothing is passed,
            the package will be [external_id:snake] of the first data model given, while
            the client name will be [external_id:pascal_case]. In the case, pygen is part of another package,
            the top level package should be the full package name. For example, `cognite.apm`.
        client_name: The name of the client class. For example, `APMClient`. See above for more details.
        default_instance_space: The default instance space to use for the generated SDK. If not provided,
            the space must be specified when creating, deleting, and retrieving nodes and edges.
        output_dir: The location to output the generated SDK.
            Defaults: Path.cwd() / Path(top_level_package.replace(".", "/")).
        logger: A logger function to log progress. Defaults to print.
        overwrite: Whether to overwrite the output directory if it already exists. Defaults to False.
        format_code: Whether to format the generated code using black. Defaults to True.
        config: The configuration used to control how to generate the SDK.
        return_sdk_files: Whether to return the generated SDK files as a dictionary. Defaults to False.
            This is useful for granular control of how to write the SDK to disk.
    """
    return _generate_sdk(
        model_id,
        client,
        top_level_package,
        client_name,
        default_instance_space,
        output_dir,
        logger,
        overwrite,
        format_code,
        config,
        return_sdk_files,
    )

generate_sdk_notebook(model_id, client=None, top_level_package=None, client_name=None, default_instance_space=None, config=None, clean_pygen_temp_dir=True)

Generates a Python SDK tailored to the given Data Model(s) and imports it into the current Python session.

This function is wrapper around generate_sdk. It is intended to be used in a Jupyter notebook. The differences are that it:

  • The SDK is generated in a temporary directory and added to the sys.path. This is such that it becomes available to be imported in the current Python session.
  • The signature is simplified.
  • An instantiated client of the generated SDK is returned.

Parameters:

Name Type Description Default
model_id DataModel | Sequence[DataModel]

The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s) directly to avoid fetching them from CDF.

required
client Optional[CogniteClient]

The cognite client used for fetching the data model. This is required if you pass in data models ID(s) in the model_id argument and not a data model.

None
top_level_package Optional[str]

The name of the top level package for the SDK. For example, if we have top_level_package=apm and the client_name=APMClient, then the importing the client will be from apm import APMClient. If nothing is passed, the package will be [external_id:snake] of the first data model given, while the client name will be [external_id:pascal_case]

None
client_name Optional[str]

The name of the client class. For example, APMClient. See above for more details.

None
default_instance_space str | None

The default instance space to use for the generated SDK. If not provided, the space must be specified when creating, deleting, and retrieving nodes and edges.

None
config Optional[PygenConfig]

The configuration used to control how to generate the SDK.

None
clean_pygen_temp_dir bool

Whether to clean the temporary directory used to store the generated SDK. Defaults to True.

True

Returns:

Type Description
Any

The instantiated generated client class.

Source code in cognite/pygen/_generator.py
def generate_sdk_notebook(
    model_id: DataModel | Sequence[DataModel],
    client: Optional[CogniteClient] = None,
    top_level_package: Optional[str] = None,
    client_name: Optional[str] = None,
    default_instance_space: str | None = None,
    config: Optional[PygenConfig] = None,
    clean_pygen_temp_dir: bool = True,
) -> Any:
    """
    Generates a Python SDK tailored to the given Data Model(s) and imports it into the current Python session.

    This function is wrapper around generate_sdk. It is intended to be used in a Jupyter notebook.
    The differences are that it:

    * The SDK is generated in a temporary directory and added to the sys.path. This is such that it
      becomes available to be imported in the current Python session.
    * The signature is simplified.
    * An instantiated client of the generated SDK is returned.


    Args:
        model_id: The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s)
            directly to avoid fetching them from CDF.
        client: The cognite client used for fetching the data model. This is required if you pass in
            data models ID(s) in the `model_id` argument and not a data model.
        top_level_package: The name of the top level package for the SDK. For example,
            if we have top_level_package=`apm` and the client_name=`APMClient`, then
            the importing the client will be `from apm import APMClient`. If nothing is passed,
            the package will be [external_id:snake] of the first data model given, while
            the client name will be [external_id:pascal_case]
        client_name: The name of the client class. For example, `APMClient`. See above for more details.
        default_instance_space: The default instance space to use for the generated SDK. If not provided,
            the space must be specified when creating, deleting, and retrieving nodes and edges.
        config: The configuration used to control how to generate the SDK.
        clean_pygen_temp_dir: Whether to clean the temporary directory used to store the generated SDK.
            Defaults to True.

    Returns:
        The instantiated generated client class.
    """
    data_model = _get_data_model(model_id, client, print)
    folder_name = _create_folder_name(
        data_model.as_id() if isinstance(data_model, dm.DataModel) else data_model.as_ids()
    )
    output_dir = Path(tempfile.gettempdir()) / "pygen" / folder_name
    if clean_pygen_temp_dir and output_dir.exists():
        try:
            shutil.rmtree(output_dir)
        except Exception as e:
            print(f"Failed to clean temporary directory {output_dir}: {e}")
        else:
            print(f"Cleaned temporary directory {output_dir}")

    external_id = _extract_external_id(data_model)
    if top_level_package is None:
        top_level_package = _default_top_level_package(external_id)
    if client_name is None:
        client_name = _default_client_name(external_id)
    _generate_sdk(
        data_model,
        client,
        top_level_package=top_level_package,
        client_name=client_name,
        default_instance_space=default_instance_space,
        output_dir=output_dir / _top_level_to_path(top_level_package),
        overwrite=True,
        format_code=False,
        config=config,
        context="notebook",
    )
    if str(output_dir) not in sys.path:
        sys.path.append(str(output_dir))
        print(f"Added {output_dir} to sys.path to enable import")
    else:
        print(f"{output_dir} already in sys.path")
    module = vars(importlib.import_module(top_level_package))
    print(f"Imported {top_level_package}")
    print("You can now use the generated SDK in the current Python session.")
    if isinstance(data_model, dm.DataModel):
        view = data_model.views[0]
    elif isinstance(data_model, Sequence):
        view = data_model[0].views[0]
    else:
        view = None

    if view:
        print(
            "The data classes are available by importing, for example, "
            f"`from {top_level_package}.data_classes import {DataClass.to_base_name(view)}Write`"
        )
    return module[client_name](client)

load_cognite_client_from_toml(toml_file='config.toml', section='cognite')

This is a small helper function to load a CogniteClient from a toml file.

The default name of the config file is "config.toml" and it should look like this:

[cognite]
project = "<cdf-project>"
tenant_id = "<tenant-id>"
cdf_cluster = "<cdf-cluster>"
client_id = "<client-id>"
client_secret = "<client-secret>"

Parameters:

Name Type Description Default
toml_file Path | str

Path to toml file

'config.toml'
section str | None

Name of the section in the toml file to use. If None, use the top level of the toml file. Defaults to "cognite".

'cognite'

Returns:

Type Description
CogniteClient

A CogniteClient with configurations from the toml file.

Source code in cognite/pygen/utils/cdf.py
def load_cognite_client_from_toml(
    toml_file: Path | str = "config.toml", section: str | None = "cognite"
) -> CogniteClient:
    """
    This is a small helper function to load a CogniteClient from a toml file.

    The default name of the config file is "config.toml" and it should look like this:

    ```toml
    [cognite]
    project = "<cdf-project>"
    tenant_id = "<tenant-id>"
    cdf_cluster = "<cdf-cluster>"
    client_id = "<client-id>"
    client_secret = "<client-secret>"
    ```

    Args:
        toml_file: Path to toml file
        section: Name of the section in the toml file to use. If None, use the top level of the toml file.
                 Defaults to "cognite".

    Returns:
        A CogniteClient with configurations from the toml file.
    """
    import toml

    toml_content = toml.load(toml_file)
    if section is not None:
        toml_content = toml_content[section]

    login_flow = toml_content.pop("login_flow", None)
    if login_flow == "interactive":
        return CogniteClient.default_oauth_interactive(**toml_content)
    else:
        return CogniteClient.default_oauth_client_credentials(**toml_content)