Pygen

`cognite.pygen`

This is the main entry point for the pygen package. It contains the main functions for generating SDKs.

`build_wheel(model_id, client=None, *, top_level_package=None, client_name=None, default_instance_space=None, output_dir=Path('dist'), format_code=True, config=None)`

Generates a wheel with Python SDK tailored to the given Data Model(s).

Parameters:

Name	Type	Description	Default
`model_id`	`DataModel \| Sequence[DataModel]`	The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s) directly to avoid fetching them from CDF.	required
`client`	`Optional[CogniteClient]`	The cognite client used for fetching the data model. This is required if you pass in data models ID(s) in the `model_id` argument and not a data model.	`None`
`top_level_package`	`Optional[str]`	The name of the top level package for the SDK. For example, if we have top_level_package=`apm` and the client_name=`APMClient`, then the importing the client will be `from apm import APMClient`. If nothing is passed, the package will be [external_id:snake] of the first data model given, while the client name will be [external_id:pascal_case]	`None`
`client_name`	`Optional[str]`	The name of the client class. For example, `APMClient`. See above for more details.	`None`
`default_instance_space`	`str \| None`	The default instance space to use for the generated SDK. If not provided, the space must be specified when creating, deleting, and retrieving nodes and edges.	`None`
`output_dir`	`Path`	The location to output the generated SDK wheel. Defaults to "dist".	`Path('dist')`
`format_code`	`bool`	Whether to format the generated code using black. Defaults to True.	`True`
`config`	`Optional[PygenConfig]`	The configuration used to control how to generate the SDK.	`None`

Source code in cognite/pygen/_build.py

def build_wheel(
    model_id: DataModel | Sequence[DataModel],
    client: Optional[CogniteClient] = None,
    *,
    top_level_package: Optional[str] = None,
    client_name: Optional[str] = None,
    default_instance_space: str | None = None,
    output_dir: Path = Path("dist"),
    format_code: bool = True,
    config: Optional[PygenConfig] = None,
) -> None:
    """
    Generates a wheel with Python SDK tailored to the given Data Model(s).

    Args:
        model_id: The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s)
            directly to avoid fetching them from CDF.
        client: The cognite client used for fetching the data model. This is required if you pass in
            data models ID(s) in the `model_id` argument and not a data model.
        top_level_package: The name of the top level package for the SDK. For example,
            if we have top_level_package=`apm` and the client_name=`APMClient`, then
            the importing the client will be `from apm import APMClient`. If nothing is passed,
            the package will be [external_id:snake] of the first data model given, while
            the client name will be [external_id:pascal_case]
        client_name: The name of the client class. For example, `APMClient`. See above for more details.
        default_instance_space: The default instance space to use for the generated SDK. If not provided,
            the space must be specified when creating, deleting, and retrieving nodes and edges.
        output_dir: The location to output the generated SDK wheel. Defaults to "dist".
        format_code: Whether to format the generated code using black. Defaults to True.
        config: The configuration used to control how to generate the SDK.
    """
    try:
        from build import ProjectBuilder
    except ImportError:
        raise ImportError(
            "'build' is required to build wheel. Install pygen with `pip install pygen[cli] or "
            "install build directly `pip install build`."
        ) from None

    data_model = _get_data_model(model_id, client, print)
    folder_name = _create_folder_name(
        data_model.as_id() if isinstance(data_model, dm.DataModel) else data_model.as_ids()
    )
    build_dir = Path(tempfile.gettempdir()) / "pygen_build" / folder_name
    if build_dir.exists():
        try:
            shutil.rmtree(build_dir)
        except Exception as e:
            print(f"Failed to clean temporary build directory {build_dir}: {e}")

    external_id = _extract_external_id(data_model)
    if top_level_package is None:
        top_level_package = _default_top_level_package(external_id)
    if client_name is None:
        client_name = _default_client_name(external_id)
    generate_sdk(
        data_model,
        client,
        top_level_package=top_level_package,
        client_name=client_name,
        default_instance_space=default_instance_space,
        output_dir=build_dir / _top_level_to_path(top_level_package),
        overwrite=True,
        format_code=format_code,
        config=config,
    )

    version = data_model.version if isinstance(data_model, dm.DataModel) else data_model[0].version
    generate_pyproject_toml(build_dir, top_level_package, version=version)

    output_dir.mkdir(exist_ok=True, parents=True)
    ProjectBuilder(build_dir).build(distribution="wheel", output_directory=str(output_dir))

    print(f"Generated SDK wheel at {output_dir}")

`generate_sdk(model_id, client=None, top_level_package=None, client_name=None, default_instance_space=None, output_dir=None, logger=None, overwrite=False, format_code=False, config=None, return_sdk_files=False, exclude_views=None, exclude_spaces=None)`

generate_sdk(model_id: DataModel | Sequence[DataModel], client: Optional[CogniteClient] = None, top_level_package: Optional[str] = None, client_name: Optional[str] = None, default_instance_space: str | None = None, output_dir: Optional[Path] = None, logger: Optional[Callable[[str], None]] = None, overwrite: bool = False, format_code: bool = False, config: Optional[PygenConfig] = None, return_sdk_files: Literal[False] = False, exclude_views: set[dm.ViewId | str] | None = None, exclude_spaces: set[str] | None = None) -> None

generate_sdk(model_id: DataModel | Sequence[DataModel], client: Optional[CogniteClient] = None, top_level_package: Optional[str] = None, client_name: Optional[str] = None, default_instance_space: str | None = None, output_dir: Optional[Path] = None, logger: Optional[Callable[[str], None]] = None, overwrite: bool = False, format_code: bool = False, config: Optional[PygenConfig] = None, return_sdk_files: Literal[True] = False, exclude_views: set[dm.ViewId | str] | None = None, exclude_spaces: set[str] | None = None) -> dict[Path, str]

Generates a Python SDK tailored to the given Data Model(s).

Parameters:

Name	Type	Description	Default
`model_id`	`DataModel \| Sequence[DataModel]`	The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s) directly to avoid fetching them from CDF.	required
`client`	`Optional[CogniteClient]`	The cognite client used for fetching the data model. This is required if you pass in data models ID(s) in the `model_id` argument and not a data model.	`None`
`top_level_package`	`Optional[str]`	The name of the top level package for the SDK. For example, if we have top_level_package=`apm` and the client_name=`APMClient`, then the importing the client will be `from apm import APMClient`. If nothing is passed, the package will be [external_id:snake] of the first data model given, while the client name will be [external_id:pascal_case]. In the case, pygen is part of another package, the top level package should be the full package name. For example, `cognite.apm`.	`None`
`client_name`	`Optional[str]`	The name of the client class. For example, `APMClient`. See above for more details.	`None`
`default_instance_space`	`str \| None`	The default instance space to use for the generated SDK. If not provided, the space must be specified when creating, deleting, and retrieving nodes and edges.	`None`
`output_dir`	`Optional[Path]`	The location to output the generated SDK. Defaults: Path.cwd() / Path(top_level_package.replace(".", "/")).	`None`
`logger`	`Optional[Callable[[str], None]]`	A logger function to log progress. Defaults to print.	`None`
`overwrite`	`bool`	Whether to overwrite the output directory if it already exists. Defaults to False.	`False`
`format_code`	`bool`	Whether to format the generated code using black. Defaults to False.	`False`
`config`	`Optional[PygenConfig]`	The configuration used to control how to generate the SDK.	`None`
`return_sdk_files`	`bool`	Whether to return the generated SDK files as a dictionary. Defaults to False. This is useful for granular control of how to write the SDK to disk.	`False`
`exclude_views`	`set[ViewId \| str] \| None`	A set of view IDs to exclude from the generated SDK. If None, all views are included. Given as a ViewId or a view ExternalId.	`None`
`exclude_spaces`	`set[str] \| None`	A set of space IDs to exclude from the generated SDK. If None, all spaces are included. Given as a space name.	`None`

Source code in cognite/pygen/_generator.py

def generate_sdk(
    model_id: DataModel | Sequence[DataModel],
    client: Optional[CogniteClient] = None,
    top_level_package: Optional[str] = None,
    client_name: Optional[str] = None,
    default_instance_space: str | None = None,
    output_dir: Optional[Path] = None,
    logger: Optional[Callable[[str], None]] = None,
    overwrite: bool = False,
    format_code: bool = False,
    config: Optional[PygenConfig] = None,
    return_sdk_files: bool = False,
    exclude_views: set[dm.ViewId | str] | None = None,
    exclude_spaces: set[str] | None = None,
) -> None | dict[Path, str]:
    """
    Generates a Python SDK tailored to the given Data Model(s).

    Args:
        model_id: The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s)
            directly to avoid fetching them from CDF.
        client: The cognite client used for fetching the data model. This is required if you pass in
            data models ID(s) in the `model_id` argument and not a data model.
        top_level_package: The name of the top level package for the SDK. For example,
            if we have top_level_package=`apm` and the client_name=`APMClient`, then
            the importing the client will be `from apm import APMClient`. If nothing is passed,
            the package will be [external_id:snake] of the first data model given, while
            the client name will be [external_id:pascal_case]. In the case, pygen is part of another package,
            the top level package should be the full package name. For example, `cognite.apm`.
        client_name: The name of the client class. For example, `APMClient`. See above for more details.
        default_instance_space: The default instance space to use for the generated SDK. If not provided,
            the space must be specified when creating, deleting, and retrieving nodes and edges.
        output_dir: The location to output the generated SDK.
            Defaults: Path.cwd() / Path(top_level_package.replace(".", "/")).
        logger: A logger function to log progress. Defaults to print.
        overwrite: Whether to overwrite the output directory if it already exists. Defaults to False.
        format_code: Whether to format the generated code using black. Defaults to False.
        config: The configuration used to control how to generate the SDK.
        return_sdk_files: Whether to return the generated SDK files as a dictionary. Defaults to False.
            This is useful for granular control of how to write the SDK to disk.
        exclude_views: A set of view IDs to exclude from the generated SDK. If None, all views are included.
            Given as a ViewId or a view ExternalId.
        exclude_spaces: A set of space IDs to exclude from the generated SDK. If None, all spaces
            are included. Given as a space name.
    """
    return _generate_sdk(
        model_id,
        client,
        top_level_package,
        client_name,
        default_instance_space,
        output_dir,
        logger,
        overwrite,
        format_code,
        config,
        return_sdk_files,
        exclude_views,
        exclude_spaces,
    )

`generate_sdk_notebook(model_id, client=None, top_level_package=None, client_name=None, default_instance_space=None, config=None, clean_pygen_temp_dir=True, exclude_views=None, exclude_spaces=None)`

Generates a Python SDK tailored to the given Data Model(s) and imports it into the current Python session.

This function is wrapper around generate_sdk. It is intended to be used in a Jupyter notebook. The differences are that it:

The SDK is generated in a temporary directory and added to the sys.path. This is such that it becomes available to be imported in the current Python session.
The signature is simplified.
An instantiated client of the generated SDK is returned.

Parameters:

Name	Type	Description	Default
`model_id`	`DataModel \| Sequence[DataModel]`	The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s) directly to avoid fetching them from CDF.	required
`client`	`Optional[CogniteClient]`	The cognite client used for fetching the data model. This is required if you pass in data models ID(s) in the `model_id` argument and not a data model.	`None`
`top_level_package`	`Optional[str]`	The name of the top level package for the SDK. For example, if we have top_level_package=`apm` and the client_name=`APMClient`, then the importing the client will be `from apm import APMClient`. If nothing is passed, the package will be [external_id:snake] of the first data model given, while the client name will be [external_id:pascal_case]	`None`
`client_name`	`Optional[str]`	The name of the client class. For example, `APMClient`. See above for more details.	`None`
`default_instance_space`	`str \| None`	The default instance space to use for the generated SDK. If not provided, the space must be specified when creating, deleting, and retrieving nodes and edges.	`None`
`config`	`Optional[PygenConfig]`	The configuration used to control how to generate the SDK.	`None`
`clean_pygen_temp_dir`	`bool`	Whether to clean the temporary directory used to store the generated SDK. Defaults to True.	`True`
`exclude_views`	`set[ViewId \| str] \| None`	A set of view IDs to exclude from the generated SDK. If None, all views are included. Given as a ViewId or a view ExternalId.	`None`
`exclude_spaces`	`set[str] \| None`	A set of space IDs to exclude from the generated SDK. If None, all spaces are included. Given as a space name.	`None`

Returns: The instantiated generated client class.

Source code in cognite/pygen/_generator.py

def generate_sdk_notebook(
    model_id: DataModel | Sequence[DataModel],
    client: Optional[CogniteClient] = None,
    top_level_package: Optional[str] = None,
    client_name: Optional[str] = None,
    default_instance_space: str | None = None,
    config: Optional[PygenConfig] = None,
    clean_pygen_temp_dir: bool = True,
    exclude_views: set[dm.ViewId | str] | None = None,
    exclude_spaces: set[str] | None = None,
) -> Any:
    """
    Generates a Python SDK tailored to the given Data Model(s) and imports it into the current Python session.

    This function is wrapper around generate_sdk. It is intended to be used in a Jupyter notebook.
    The differences are that it:

    * The SDK is generated in a temporary directory and added to the sys.path. This is such that it
      becomes available to be imported in the current Python session.
    * The signature is simplified.
    * An instantiated client of the generated SDK is returned.


    Args:
        model_id: The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s)
            directly to avoid fetching them from CDF.
        client: The cognite client used for fetching the data model. This is required if you pass in
            data models ID(s) in the `model_id` argument and not a data model.
        top_level_package: The name of the top level package for the SDK. For example,
            if we have top_level_package=`apm` and the client_name=`APMClient`, then
            the importing the client will be `from apm import APMClient`. If nothing is passed,
            the package will be [external_id:snake] of the first data model given, while
            the client name will be [external_id:pascal_case]
        client_name: The name of the client class. For example, `APMClient`. See above for more details.
        default_instance_space: The default instance space to use for the generated SDK. If not provided,
            the space must be specified when creating, deleting, and retrieving nodes and edges.
        config: The configuration used to control how to generate the SDK.
        clean_pygen_temp_dir: Whether to clean the temporary directory used to store the generated SDK.
            Defaults to True.
        exclude_views: A set of view IDs to exclude from the generated SDK. If None, all views are included.
            Given as a ViewId or a view ExternalId.
        exclude_spaces: A set of space IDs to exclude from the generated SDK. If None, all spaces
            are included. Given as a space name.
    Returns:
        The instantiated generated client class.
    """
    data_model = _get_data_model(model_id, client, print)
    folder_name = _create_folder_name(
        data_model.as_id() if isinstance(data_model, dm.DataModel) else data_model.as_ids()
    )
    output_dir = Path(tempfile.gettempdir()) / "pygen" / folder_name
    if clean_pygen_temp_dir and output_dir.exists():
        try:
            shutil.rmtree(output_dir)
        except Exception as e:
            print(f"Failed to clean temporary directory {output_dir}: {e}")
        else:
            print(f"Cleaned temporary directory {output_dir}")

    external_id = _extract_external_id(data_model)
    if top_level_package is None:
        top_level_package = _default_top_level_package(external_id)
    if client_name is None:
        client_name = _default_client_name(external_id)
    _generate_sdk(
        data_model,
        client,
        top_level_package=top_level_package,
        client_name=client_name,
        default_instance_space=default_instance_space,
        output_dir=output_dir / _top_level_to_path(top_level_package),
        overwrite=True,
        format_code=False,
        config=config,
        context="notebook",
        exclude_views=exclude_views,
        exclude_spaces=exclude_spaces,
    )
    if str(output_dir) not in sys.path:
        sys.path.append(str(output_dir))
        print(f"Added {output_dir} to sys.path to enable import")
    else:
        print(f"{output_dir} already in sys.path")
    try:
        module = vars(importlib.import_module(top_level_package))
    except SchemaError as error:
        if is_pyodide() and "recursion_loop" in {e["type"] for e in error.errors() if "type" in e}:
            print("Large SDK detected. Reached recursion limit in Pyodide. Tying again skipping schema validation.")
            os.environ["PYDANTIC_SKIP_VALIDATING_CORE_SCHEMAS"] = "true"
            module = vars(importlib.import_module(top_level_package))
        else:
            raise error

    print(f"Imported {top_level_package}")
    print("You can now use the generated SDK in the current Python session.")
    first_write_cls = next(
        (name for name, _ in inspect.getmembers(module["data_classes"]) if name.endswith("Write")), None
    )

    if first_write_cls:
        print(
            "The data classes are available by importing, for example, "
            f"`from {top_level_package}.data_classes import {first_write_cls}`"
        )
    return module[client_name](client)

`load_cognite_client_from_toml(toml_file='config.toml', section='cognite')`

This is a small helper function to load a CogniteClient from a toml file.

The default name of the config file is "config.toml" and it should look like this:

[cognite]
project = "<cdf-project>"
tenant_id = "<tenant-id>"
cdf_cluster = "<cdf-cluster>"
client_id = "<client-id>"
client_secret = "<client-secret>"

Parameters:

Name	Type	Description	Default
`toml_file`	`Path \| str`	Path to toml file	`'config.toml'`
`section`	`str \| None`	Name of the section in the toml file to use. If None, use the top level of the toml file. Defaults to "cognite".	`'cognite'`

Returns:

Type	Description
`CogniteClient`	A CogniteClient with configurations from the toml file.

Source code in cognite/pygen/utils/cdf.py

def load_cognite_client_from_toml(
    toml_file: Path | str = "config.toml", section: str | None = "cognite"
) -> CogniteClient:
    """
    This is a small helper function to load a CogniteClient from a toml file.

    The default name of the config file is "config.toml" and it should look like this:

    ```toml
    [cognite]
    project = "<cdf-project>"
    tenant_id = "<tenant-id>"
    cdf_cluster = "<cdf-cluster>"
    client_id = "<client-id>"
    client_secret = "<client-secret>"
    ```

    Args:
        toml_file: Path to toml file
        section: Name of the section in the toml file to use. If None, use the top level of the toml file.
                 Defaults to "cognite".

    Returns:
        A CogniteClient with configurations from the toml file.
    """
    import toml

    toml_content = toml.load(toml_file)
    if section is not None:
        toml_content = toml_content[section]

    login_flow = toml_content.pop("login_flow", None)
    if login_flow == "interactive":
        return CogniteClient.default_oauth_interactive(**toml_content)
    else:
        return CogniteClient.default_oauth_client_credentials(**toml_content)