Skip to content

Pygen

cognite.pygen

This is the main entry point for the pygen package. It contains the main functions for generating SDKs.

build_wheel(model_id, client=None, *, top_level_package=None, client_name=None, default_instance_space=None, output_dir=Path('dist'), format_code=True, config=None)

Generates a wheel with Python SDK tailored to the given Data Model(s).

Parameters:

Name Type Description Default
model_id DataModel | Sequence[DataModel]

The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s) directly to avoid fetching them from CDF.

required
client Optional[CogniteClient]

The cognite client used for fetching the data model. This is required if you pass in data models ID(s) in the model_id argument and not a data model.

None
top_level_package Optional[str]

The name of the top level package for the SDK. For example, if we have top_level_package=apm and the client_name=APMClient, then the importing the client will be from apm import APMClient. If nothing is passed, the package will be [external_id:snake] of the first data model given, while the client name will be [external_id:pascal_case]

None
client_name Optional[str]

The name of the client class. For example, APMClient. See above for more details.

None
default_instance_space str | None

The default instance space to use for the generated SDK. Defaults to the instance space of the first data model given.

None
output_dir Path

The location to output the generated SDK wheel. Defaults to "dist".

Path('dist')
format_code bool

Whether to format the generated code using black. Defaults to True.

True
config Optional[PygenConfig]

The configuration used to control how to generate the SDK.

None
Source code in cognite/pygen/_build.py
def build_wheel(
    model_id: DataModel | Sequence[DataModel],
    client: Optional[CogniteClient] = None,
    *,
    top_level_package: Optional[str] = None,
    client_name: Optional[str] = None,
    default_instance_space: str | None = None,
    output_dir: Path = Path("dist"),
    format_code: bool = True,
    config: Optional[PygenConfig] = None,
) -> None:
    """
    Generates a wheel with Python SDK tailored to the given Data Model(s).

    Args:
        model_id: The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s)
            directly to avoid fetching them from CDF.
        client: The cognite client used for fetching the data model. This is required if you pass in
            data models ID(s) in the `model_id` argument and not a data model.
        top_level_package: The name of the top level package for the SDK. For example,
            if we have top_level_package=`apm` and the client_name=`APMClient`, then
            the importing the client will be `from apm import APMClient`. If nothing is passed,
            the package will be [external_id:snake] of the first data model given, while
            the client name will be [external_id:pascal_case]
        client_name: The name of the client class. For example, `APMClient`. See above for more details.
        default_instance_space: The default instance space to use for the generated SDK. Defaults to the
            instance space of the first data model given.
        output_dir: The location to output the generated SDK wheel. Defaults to "dist".
        format_code: Whether to format the generated code using black. Defaults to True.
        config: The configuration used to control how to generate the SDK.
    """
    try:
        from build import ProjectBuilder  # type: ignore[import]
    except ImportError:
        raise ImportError(
            "'build' is required to build wheel. Install pygen with `pip install pygen[cli] or "
            "install build directly `pip install build`."
        ) from None

    data_model = _get_data_model(model_id, client, print)
    folder_name = _create_folder_name(
        data_model.as_id() if isinstance(data_model, dm.DataModel) else data_model.as_ids()
    )
    build_dir = Path(tempfile.gettempdir()) / "pygen_build" / folder_name
    if build_dir.exists():
        try:
            shutil.rmtree(build_dir)
        except Exception as e:
            print(f"Failed to clean temporary build directory {build_dir}: {e}")

    external_id = _extract_external_id(data_model)
    if top_level_package is None:
        top_level_package = _default_top_level_package(external_id)
    if client_name is None:
        client_name = _default_client_name(external_id)
    generate_sdk(
        data_model,
        client,
        top_level_package=top_level_package,
        client_name=client_name,
        default_instance_space=default_instance_space,
        output_dir=build_dir,
        overwrite=True,
        format_code=format_code,
        config=config,
    )

    generate_pyproject_toml(build_dir, top_level_package)

    output_dir.mkdir(exist_ok=True, parents=True)
    ProjectBuilder(build_dir).build(distribution="wheel", output_directory=str(output_dir))

    print(f"Generated SDK wheel at {output_dir}")

generate_sdk(model_id, client=None, top_level_package=None, client_name=None, default_instance_space=None, output_dir=None, logger=None, pydantic_version='infer', overwrite=False, format_code=True, config=None, return_sdk_files=False)

Generates a Python SDK tailored to the given Data Model(s).

Parameters:

Name Type Description Default
model_id DataModel | Sequence[DataModel]

The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s) directly to avoid fetching them from CDF.

required
client Optional[CogniteClient]

The cognite client used for fetching the data model. This is required if you pass in data models ID(s) in the model_id argument and not a data model.

None
top_level_package Optional[str]

The name of the top level package for the SDK. For example, if we have top_level_package=apm and the client_name=APMClient, then the importing the client will be from apm import APMClient. If nothing is passed, the package will be [external_id:snake] of the first data model given, while the client name will be [external_id:pascal_case]

None
client_name Optional[str]

The name of the client class. For example, APMClient. See above for more details.

None
default_instance_space str | None

The default instance space to use for the generated SDK. Defaults to the instance space of the first data model given.

None
output_dir Optional[Path]

The location to output the generated SDK. Defaults to the current working directory.

None
logger Optional[Callable[[str], None]]

A logger function to log progress. Defaults to print.

None
pydantic_version Literal['v1', 'v2', 'infer']

The version of pydantic to use. Defaults to "infer" which will use the environment to detect the installed version of pydantic.

'infer'
overwrite bool

Whether to overwrite the output directory if it already exists. Defaults to False.

False
format_code bool

Whether to format the generated code using black. Defaults to True.

True
config Optional[PygenConfig]

The configuration used to control how to generate the SDK.

None
return_sdk_files bool

Whether to return the generated SDK files as a dictionary. Defaults to False. This is useful for granular control of how to write the SDK to disk.

False
Source code in cognite/pygen/_generator.py
def generate_sdk(
    model_id: DataModel | Sequence[DataModel],
    client: Optional[CogniteClient] = None,
    top_level_package: Optional[str] = None,
    client_name: Optional[str] = None,
    default_instance_space: str | None = None,
    output_dir: Optional[Path] = None,
    logger: Optional[Callable[[str], None]] = None,
    pydantic_version: Literal["v1", "v2", "infer"] = "infer",
    overwrite: bool = False,
    format_code: bool = True,
    config: Optional[PygenConfig] = None,
    return_sdk_files: bool = False,
) -> None | dict[Path, str]:
    """
    Generates a Python SDK tailored to the given Data Model(s).

    Args:
        model_id: The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s)
            directly to avoid fetching them from CDF.
        client: The cognite client used for fetching the data model. This is required if you pass in
            data models ID(s) in the `model_id` argument and not a data model.
        top_level_package: The name of the top level package for the SDK. For example,
            if we have top_level_package=`apm` and the client_name=`APMClient`, then
            the importing the client will be `from apm import APMClient`. If nothing is passed,
            the package will be [external_id:snake] of the first data model given, while
            the client name will be [external_id:pascal_case]
        client_name: The name of the client class. For example, `APMClient`. See above for more details.
        default_instance_space: The default instance space to use for the generated SDK. Defaults to the
            instance space of the first data model given.
        output_dir: The location to output the generated SDK. Defaults to the current working directory.
        logger: A logger function to log progress. Defaults to print.
        pydantic_version: The version of pydantic to use. Defaults to "infer" which will use
            the environment to detect the installed version of pydantic.
        overwrite: Whether to overwrite the output directory if it already exists. Defaults to False.
        format_code: Whether to format the generated code using black. Defaults to True.
        config: The configuration used to control how to generate the SDK.
        return_sdk_files: Whether to return the generated SDK files as a dictionary. Defaults to False.
            This is useful for granular control of how to write the SDK to disk.
    """
    logger = logger or print
    data_model = _get_data_model(model_id, client, logger)

    external_id = _extract_external_id(data_model)

    if top_level_package is None:
        top_level_package = _default_top_level_package(external_id)
    if client_name is None:
        client_name = _default_client_name(external_id)

    sdk_generator = SDKGenerator(
        top_level_package,
        client_name,
        data_model,
        default_instance_space,
        pydantic_version,
        logger,
        config or PygenConfig(),
    )
    sdk = sdk_generator.generate_sdk()
    if return_sdk_files:
        return sdk
    output_dir = output_dir or Path.cwd()
    logger(f"Writing SDK to {output_dir}")
    write_sdk_to_disk(sdk, output_dir, overwrite, logger, format_code, top_level_package)
    logger("Done!")
    return None

generate_sdk_notebook(model_id, client=None, top_level_package=None, client_name=None, default_instance_space=None, config=None, clean_pygen_temp_dir=True)

Generates a Python SDK tailored to the given Data Model(s) and imports it into the current Python session.

This function is wrapper around generate_sdk. It is intended to be used in a Jupyter notebook. The differences are that it:

  • The SDK is generated in a temporary directory and added to the sys.path. This is such that it becomes available to be imported in the current Python session.
  • The signature is simplified.
  • An instantiated client of the generated SDK is returned.

Parameters:

Name Type Description Default
model_id DataModel | Sequence[DataModel]

The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s) directly to avoid fetching them from CDF.

required
client Optional[CogniteClient]

The cognite client used for fetching the data model. This is required if you pass in data models ID(s) in the model_id argument and not a data model.

None
top_level_package Optional[str]

The name of the top level package for the SDK. For example, if we have top_level_package=apm and the client_name=APMClient, then the importing the client will be from apm import APMClient. If nothing is passed, the package will be [external_id:snake] of the first data model given, while the client name will be [external_id:pascal_case]

None
client_name Optional[str]

The name of the client class. For example, APMClient. See above for more details.

None
default_instance_space str | None

The default instance space to use for the generated SDK. Defaults to the instance space of the first data model given.

None
config Optional[PygenConfig]

The configuration used to control how to generate the SDK.

None
clean_pygen_temp_dir bool

Whether to clean the temporary directory used to store the generated SDK. Defaults to True.

True

Returns:

Type Description
Any

The instantiated generated client class.

Source code in cognite/pygen/_generator.py
def generate_sdk_notebook(
    model_id: DataModel | Sequence[DataModel],
    client: Optional[CogniteClient] = None,
    top_level_package: Optional[str] = None,
    client_name: Optional[str] = None,
    default_instance_space: str | None = None,
    config: Optional[PygenConfig] = None,
    clean_pygen_temp_dir: bool = True,
) -> Any:
    """
    Generates a Python SDK tailored to the given Data Model(s) and imports it into the current Python session.

    This function is wrapper around generate_sdk. It is intended to be used in a Jupyter notebook.
    The differences are that it:

    * The SDK is generated in a temporary directory and added to the sys.path. This is such that it
      becomes available to be imported in the current Python session.
    * The signature is simplified.
    * An instantiated client of the generated SDK is returned.


    Args:
        model_id: The ID(s) of the data model(s) used to create a tailored SDK. You can also pass in the data model(s)
            directly to avoid fetching them from CDF.
        client: The cognite client used for fetching the data model. This is required if you pass in
            data models ID(s) in the `model_id` argument and not a data model.
        top_level_package: The name of the top level package for the SDK. For example,
            if we have top_level_package=`apm` and the client_name=`APMClient`, then
            the importing the client will be `from apm import APMClient`. If nothing is passed,
            the package will be [external_id:snake] of the first data model given, while
            the client name will be [external_id:pascal_case]
        client_name: The name of the client class. For example, `APMClient`. See above for more details.
        default_instance_space: The default instance space to use for the generated SDK. Defaults to the
            instance space of the first data model given.
        config: The configuration used to control how to generate the SDK.
        clean_pygen_temp_dir: Whether to clean the temporary directory used to store the generated SDK.
            Defaults to True.

    Returns:
        The instantiated generated client class.
    """
    data_model = _get_data_model(model_id, client, print)
    folder_name = _create_folder_name(
        data_model.as_id() if isinstance(data_model, dm.DataModel) else data_model.as_ids()
    )
    output_dir = Path(tempfile.gettempdir()) / "pygen" / folder_name
    if clean_pygen_temp_dir and output_dir.exists():
        try:
            shutil.rmtree(output_dir)
        except Exception as e:
            print(f"Failed to clean temporary directory {output_dir}: {e}")
        else:
            print(f"Cleaned temporary directory {output_dir}")

    external_id = _extract_external_id(data_model)
    if top_level_package is None:
        top_level_package = _default_top_level_package(external_id)
    if client_name is None:
        client_name = _default_client_name(external_id)
    generate_sdk(
        data_model,
        client,
        top_level_package=top_level_package,
        client_name=client_name,
        default_instance_space=default_instance_space,
        output_dir=output_dir,
        overwrite=True,
        format_code=False,
        config=config,
    )
    if str(output_dir) not in sys.path:
        sys.path.append(str(output_dir))
        print(f"Added {output_dir} to sys.path to enable import")
    else:
        print(f"{output_dir} already in sys.path")
    module = vars(importlib.import_module(top_level_package))
    print(f"Imported {top_level_package}")
    print("You can now use the generated SDK in the current Python session.")
    if isinstance(data_model, dm.DataModel):
        view = data_model.views[0]
    elif isinstance(data_model, Sequence):
        view = data_model[0].views[0]
    else:
        view = None

    if view:
        print(
            "The data classes are available by importing, for example, "
            f"`from {top_level_package}.data_classes import {DataClass.to_base_name(view)}Write`"
        )
    return module[client_name](client)

load_cognite_client_from_toml(toml_file='config.toml', section='cognite')

This is a small helper function to load a CogniteClient from a toml file.

The default name of the config file is "config.toml" and it should look like this:

[cognite]
project = "<cdf-project>"
tenant_id = "<tenant-id>"
cdf_cluster = "<cdf-cluster>"
client_id = "<client-id>"
client_secret = "<client-secret>"

Parameters:

Name Type Description Default
toml_file Path | str

Path to toml file

'config.toml'
section str | None

Name of the section in the toml file to use. If None, use the top level of the toml file. Defaults to "cognite".

'cognite'

Returns:

Type Description
CogniteClient

A CogniteClient with configurations from the toml file.

Source code in cognite/pygen/utils/cdf.py
def load_cognite_client_from_toml(
    toml_file: Path | str = "config.toml", section: str | None = "cognite"
) -> CogniteClient:
    """
    This is a small helper function to load a CogniteClient from a toml file.

    The default name of the config file is "config.toml" and it should look like this:

    ```toml
    [cognite]
    project = "<cdf-project>"
    tenant_id = "<tenant-id>"
    cdf_cluster = "<cdf-cluster>"
    client_id = "<client-id>"
    client_secret = "<client-secret>"
    ```

    Args:
        toml_file: Path to toml file
        section: Name of the section in the toml file to use. If None, use the top level of the toml file.
                 Defaults to "cognite".

    Returns:
        A CogniteClient with configurations from the toml file.
    """
    import toml

    toml_content = toml.load(toml_file)
    if section is not None:
        toml_content = toml_content[section]

    login_flow = toml_content.pop("login_flow", None)
    if login_flow == "interactive":
        return CogniteClient.default_oauth_interactive(**toml_content)
    else:
        return CogniteClient.default_oauth_client_credentials(**toml_content)