Skip to content

Main

cognite.pygen.utils

MockGenerator

Mock generator for the pygen package. It can be used to generate mock nodes, edges, timeseries, sequences, and files for a given data model/views.

Parameters:

Name Type Description Default
views List[View]

The views to generate mock data for.

required
instance_space str

The space to use for the generated nodes and edges.

required
view_configs dict[ViewId, ViewMockConfig]

Configuration for how to generate mock data for the different views. The keys are the view ids, and the values are the configuration for the view.

None
default_config ViewMockConfig

Default configuration for how to generate mock data for the different views. Set to 'faker' to use the Python package faker to generate mock data.

None
data_set_id int

The data set id to use for TimeSeries, Sequences, and FileMetadata.

None
seed int

The seed to use for the random number generator. If provided, it is used to reset the seed for each view to ensure reproducible results.

None
skip_interfaces bool

Whether to skip interfaces when generating mock data. Defaults to False.

False
Source code in cognite/pygen/utils/mock_generator.py
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
class MockGenerator:
    """Mock generator for the pygen package. It can be used to generate mock nodes, edges, timeseries,
    sequences, and files for a given data model/views.

    Args:
        views (List[View]): The views to generate mock data for.
        instance_space (str): The space to use for the generated nodes and edges.
        view_configs (dict[ViewId, ViewMockConfig]): Configuration for how to generate mock data for the different
            views. The keys are the view ids, and the values are the configuration for the view.
        default_config (ViewMockConfig): Default configuration for how to generate mock data for the different
            views. Set to 'faker' to use the Python package faker to generate mock data.
        data_set_id (int): The data set id to use for TimeSeries, Sequences, and FileMetadata.
        seed (int): The seed to use for the random number generator. If provided, it is used to reset the seed for
            each view to ensure reproducible results.
        skip_interfaces (bool): Whether to skip interfaces when generating mock data. Defaults to False.
    """

    def __init__(
        self,
        views: typing.Sequence[dm.View],
        instance_space: str,
        view_configs: dict[dm.ViewId, ViewMockConfig] | None = None,
        default_config: ViewMockConfig | Literal["faker"] | None = None,
        data_set_id: int | None = None,
        seed: int | None = None,
        skip_interfaces: bool = False,
    ):
        self._view_by_id = {view.as_id(): view for view in views}
        self._instance_space = instance_space
        self._view_configs = view_configs or {}
        if default_config == "faker":
            self._default_config = _create_faker_config()
        else:
            self._default_config = default_config or ViewMockConfig()
        self._data_set_id = data_set_id
        self._seed = seed
        self._skip_interfaces = skip_interfaces
        self._interfaces: set[dm.ViewId] = set()

    @property
    def _views(self) -> dm.ViewList:
        return dm.ViewList(self._view_by_id.values())

    def __str__(self):
        args = [
            f"view_count={len(self._views)}",
            f"instance_space={self._instance_space}",
        ]
        if self._view_configs:
            args.append(f"custom_config_cont={len(self._view_configs)}")
        if self._default_config == ViewMockConfig():
            args.append("default_config=True")
        else:
            args.append("default_config=False")
        if self._data_set_id:
            args.append(f"data_set_id={self._data_set_id}")
        if self._seed:
            args.append(f"seed={self._seed}")

        return f"MockGenerator({', '.join(args)})"

    def _repr_html_(self) -> str:
        return str(self)

    @classmethod
    def from_data_model(
        cls,
        data_model_id: DataModelIdentifier,
        instance_space: str,
        client: CogniteClient,
        data_set_id: int | None = None,
        seed: int | None = None,
    ) -> MockGenerator:
        """Creates a MockGenerator from a data model.

        Args:
            data_model_id: Identifier of the data model to generate mock data for.
            instance_space: The space to use for the generated nodes and edges.
            client: An instance of the CogniteClient class.
            data_set_id: The data set id to use for TimeSeries, Sequences, and FileMetadata.
            seed: The seed to use for the random number generator.
            default_config:

        Returns:
            MockGenerator: The mock generator.

        """
        with _log_pygen_mock_call(client) as client:
            data_model = client.data_modeling.data_models.retrieve(
                ids=data_model_id,
                inline_views=True,
            ).latest_version()

        return cls(
            views=data_model.views,
            instance_space=instance_space,
            data_set_id=data_set_id,
            seed=seed,
        )

    def generate_mock_data(
        self, node_count: int = 5, max_edge_per_type: int = 5, null_values: float = 0.25
    ) -> MockData:
        """Generates mock data for the given data model/views.


        Args:
            node_count: The number of nodes to generate for each view.
            max_edge_per_type: The maximum number of edges to generate for each edge type.
            null_values: The probability of generating a null value for a nullable properties.

        Returns:
            MockData: The generated mock data.
        """
        self._interfaces = {interface for view in self._views for interface in view.implements or []}
        mock_data = MockData()
        for connected_views in _connected_views(self._views):
            data = self._generate_views_mock_data(connected_views, node_count, max_edge_per_type, null_values)
            mock_data.extend(data)
        return mock_data

    def _generate_views_mock_data(
        self, views: list[dm.View], node_count: int, max_edge_per_type: int, null_values: float
    ) -> MockData:
        outputs = self._generate_mock_nodes(views, node_count, null_values)
        self._generate_mock_connections(views, outputs, max_edge_per_type, null_values)
        return MockData(outputs.values())

    def _generate_mock_nodes(
        self, views: list[dm.View], default_node_count: int, default_nullable_fraction: float
    ) -> dict[dm.ViewId, ViewMockData]:
        output: dict[dm.ViewId, ViewMockData] = {}
        for view in sorted(views, key=lambda v: v.as_id().as_tuple()):
            if self._skip_interfaces and view.as_id() in self._interfaces:
                continue
            if view.used_for == "edge":
                continue

            node_type = _find_first_node_type(view.filter)
            view_id = view.as_id()
            if self._seed:
                self._reset_seed(view_id)

            mapped_properties = self._get_mapped_properties(view)
            config = self._view_configs.get(view_id, self._default_config)
            properties, cdf_ref_external = self._generate_mock_values(
                mapped_properties,
                config,
                view.as_id(),
                config.node_count or default_node_count,
                config.null_values or default_nullable_fraction,
            )
            node_ids = config.node_id_generator(view_id, config.node_count or default_node_count)

            nodes = [
                dm.NodeApply(
                    space=self._instance_space,
                    external_id=node_id,
                    type=node_type,
                    sources=(
                        [
                            dm.NodeOrEdgeData(
                                source=view.as_id(),
                                properties=dict(zip(properties.keys(), props, strict=False)),
                            )
                        ]
                        if props
                        else None
                    ),
                )
                for node_id, *props in zip(node_ids, *properties.values(), strict=False)
            ]
            output[view.as_id()] = ViewMockData(
                view.as_id(),
                instance_space=self._instance_space,
                is_writeable=view.writable,
                node=dm.NodeApplyList(nodes),
                timeseries=TimeSeriesList(cdf_ref_external.timeseries),
                sequence=SequenceList(cdf_ref_external.sequence),
                file=FileMetadataList(cdf_ref_external.file),
            )
        return output

    @staticmethod
    def _get_mapped_properties(view: dm.View) -> dict[str, dm.MappedProperty]:
        return {
            name: prop
            for name, prop in view.properties.items()
            if isinstance(prop, dm.MappedProperty) and not isinstance(prop.type, dm.DirectRelation)
        }

    def _generate_mock_connections(
        self,
        views: list[dm.View],
        outputs: dict[dm.ViewId, ViewMockData],
        default_max_edge_count: int,
        default_nullable_fraction: float,
    ) -> None:
        leaf_children_by_parent = self._to_leaf_children_by_parent(views)
        for view in sorted(views, key=lambda v: v.as_id().as_tuple()):
            if self._skip_interfaces and view.as_id() in self._interfaces:
                continue
            connection_properties = {
                name: prop
                for name, prop in view.properties.items()
                if (isinstance(prop, dm.MappedProperty) and isinstance(prop.type, dm.DirectRelation))
                or isinstance(prop, dm.ConnectionDefinition)
            }
            if not connection_properties:
                continue
            view_id = view.as_id()

            if self._seed:
                self._reset_seed(view_id)

            config = self._view_configs.get(view_id, self._default_config)
            for this_node in outputs[view_id].node:
                for property_name, connection in connection_properties.items():
                    if (
                        isinstance(connection, MultiEdgeConnection | dm.MappedProperty)
                        and connection.source is not None
                        and connection.source not in outputs
                        and connection.source not in leaf_children_by_parent
                    ):
                        warnings.warn(
                            f"{view_id} property {property_name!r} points to a view {connection.source} "
                            f"which is not in the data model. Skipping connection generation.",
                            stacklevel=2,
                        )
                        continue

                    if isinstance(connection, EdgeConnection):
                        other_nodes = self._get_other_nodes(connection.source, outputs, leaf_children_by_parent)
                        if isinstance(connection, SingleEdgeConnection):
                            max_edge_count = 1
                        else:  # MultiEdgeConnection
                            max_edge_count = config.max_edge_per_type or default_max_edge_count
                        max_edge_count = min(max_edge_count, len(other_nodes))
                        edges = self._create_edges(
                            connection, this_node.as_id(), other_nodes, max_edge_count, default_nullable_fraction
                        )
                        outputs[view_id].edge.extend(edges)
                    elif isinstance(connection, dm.MappedProperty) and isinstance(connection.type, dm.DirectRelation):
                        if not connection.source:
                            warnings.warn(
                                f"View {view_id}: DirectRelation {property_name} is missing source, "
                                "do not know the target view the direct relation points to",
                                stacklevel=2,
                            )
                            continue
                        other_nodes = self._get_other_nodes(connection.source, outputs, leaf_children_by_parent)

                        # If the connection is nullable, we randomly decide if we should create the relation
                        create_relation = not connection.nullable or random.random() < (
                            1 - (config.null_values or default_nullable_fraction)
                        )
                        if not (create_relation and other_nodes):
                            continue
                        if connection.type.is_list:
                            max_edge_count = config.max_edge_per_type or default_max_edge_count
                        else:
                            max_edge_count = 1
                        other_nodes = random.sample(other_nodes, k=randint(1, max_edge_count))
                        values = [
                            {"space": other_node.space, "externalId": other_node.external_id}
                            for other_node in other_nodes
                        ]
                        value: dict | list[dict] = values if connection.type.is_list else values[0]
                        self._set_direct_relation_property(this_node, view_id, property_name, value)
                    elif isinstance(connection, ReverseDirectRelation):
                        continue
                    else:
                        warnings.warn(
                            f"View {view_id}: Connection {type(connection)} used by {property_name} "
                            f"is not supported by the {type(self).__name__}.",
                            stacklevel=2,
                        )

    def _generate_mock_values(
        self,
        properties: dict[str, dm.MappedProperty],
        config: ViewMockConfig,
        view_id: dm.ViewId,
        count: int,
        nullable_fraction: float,
    ) -> tuple[dict[str, typing.Sequence[ListAbleDataType]], ViewMockData]:
        output: dict[str, typing.Sequence[ListAbleDataType]] = {}
        external = ViewMockData(view_id, self._instance_space)
        values: typing.Sequence[ListAbleDataType]
        for name, prop in properties.items():
            if is_readonly_property(prop.container, prop.container_property_identifier):
                continue

            if name in config.properties:
                generator = config.properties[name]
            elif type(prop.type) in config.property_types:
                generator = config.property_types[type(prop.type)]
            elif isinstance(prop.type, Enum):
                generator = _create_enum_generator(prop.type)
            else:
                warnings.warn(
                    f"Could not generate mock data for property {name} of type {type(prop.type)}", stacklevel=2
                )

                def _only_null_values(count: int) -> list[None]:
                    return [None] * count

                generator = _only_null_values

            null_values = int(prop.nullable and count * nullable_fraction)
            node_count = count - null_values
            if isinstance(prop.type, ListablePropertyType) and prop.type.is_list:
                values = [generator(random.randint(0, 5)) for _ in range(node_count)] + [None] * null_values
            else:
                values = generator(count - null_values) + [None] * null_values

            if null_values and isinstance(values, list):
                random.shuffle(values)

            output[name] = values
            if isinstance(prop.type, dm.TimeSeriesReference):
                external.timeseries.extend(
                    [
                        TimeSeries(
                            external_id=ts,
                            name=ts,
                            data_set_id=self._data_set_id,
                            is_step=False,
                            is_string=False,
                            metadata={
                                "source": f"Pygen{type(self).__name__}",
                            },
                        )
                        for timeseries_set in values
                        for ts in (
                            cast(list[str], timeseries_set)
                            if isinstance(timeseries_set, list)
                            else [cast(str, timeseries_set)]
                        )
                        if ts
                    ]
                )
            elif isinstance(prop.type, dm.FileReference):
                external.file.extend(
                    [
                        FileMetadata(
                            external_id=file,
                            name=file,
                            source=self._instance_space,
                            data_set_id=self._data_set_id,
                            mime_type="text/plain",
                            metadata={
                                "source": f"Pygen{type(self).__name__}",
                            },
                        )
                        for file_set in values
                        for file in (cast(list[str], file_set) if isinstance(file_set, list) else [cast(str, file_set)])
                        if file
                    ]
                )
            elif isinstance(prop.type, dm.SequenceReference):
                external.sequence.extend(
                    [
                        Sequence(
                            external_id=seq,
                            name=seq,
                            data_set_id=self._data_set_id,
                            columns=[
                                SequenceColumn(
                                    external_id="value",
                                    value_type=cast(Literal["Double"], "DOUBLE"),
                                    metadata={
                                        "source": f"Pygen{type(self).__name__}",
                                    },
                                )
                            ],
                            metadata={
                                "source": f"Pygen{type(self).__name__}",
                            },
                        )
                        for seq_set in values
                        for seq in (cast(list[str], seq_set) if isinstance(seq_set, list) else [cast(str, seq_set)])
                        if seq
                    ]
                )

        return output, external

    @staticmethod
    def _get_other_nodes(
        connection: dm.ViewId,
        outputs: dict[dm.ViewId, ViewMockData],
        leaf_children_by_parent: dict[dm.ViewId, list[dm.ViewId]],
    ) -> list[dm.NodeId]:
        if connection in leaf_children_by_parent:
            sources: list[dm.NodeId] = []
            for child in leaf_children_by_parent[connection]:
                sources.extend(outputs[child].node.as_ids())
        else:
            sources = outputs[connection].node.as_ids()
        return sources

    def _create_edges(
        self,
        connection: EdgeConnection,
        this_node: dm.NodeId,
        sources: list[dm.NodeId],
        max_edge_count: int,
        default_nullable_fraction: float,
    ) -> list[dm.EdgeApply]:
        end_nodes = random.sample(sources, k=randint(0, max_edge_count))

        edges: list[dm.EdgeApply] = []
        for end_node in end_nodes:
            start_node = this_node
            if connection.direction == "inwards":
                start_node, end_node = end_node, start_node

            edge = dm.EdgeApply(
                space=self._instance_space,
                external_id=f"{start_node.external_id}:{end_node.external_id}",
                type=connection.type,
                start_node=(start_node.space, start_node.external_id),
                end_node=(end_node.space, end_node.external_id),
            )
            edges.append(edge)

        if connection.edge_source is None or connection.edge_source not in self._view_by_id:
            return edges
        edge_view = self._view_by_id[connection.edge_source]
        view_id = edge_view.as_id()
        if self._seed:
            self._reset_seed(view_id)

        mapped_properties = self._get_mapped_properties(edge_view)
        config = self._view_configs.get(view_id, self._default_config)
        properties, _ = self._generate_mock_values(
            mapped_properties,
            config,
            view_id,
            len(edges),
            config.null_values or default_nullable_fraction,
        )

        for edge, props in zip(edges, zip(*properties.values(), strict=False), strict=False):
            edge.sources.append(
                dm.NodeOrEdgeData(
                    source=view_id,
                    properties=dict(zip(properties.keys(), props, strict=False)),
                )
            )
        return edges

    def _reset_seed(self, view_id: dm.ViewId) -> None:
        """Resets the seed for the random number generator for the given view.

        The goal is to have reproducible results for each view in a data model.
        """
        if self._seed is None:
            return  # No seed set, nothing to reset
        user_seed: int = self._seed
        view_str = json.dumps(view_id.dump(camel_case=True, include_type=False), sort_keys=True)
        view_seed = user_seed + int(hashlib.md5(view_str.encode("utf-8"), usedforsecurity=False).hexdigest(), 16)
        random.seed(view_seed)
        configs = [self._default_config]
        if view_id in self._view_configs:
            configs.append(self._view_configs[view_id])

        for config in configs:
            if config.reset_seed is not None:
                config.reset_seed(view_seed)
            for generator in config.property_types.values():
                if hasattr(generator, "reset") and isinstance(generator.reset, Callable):  # type: ignore[arg-type]
                    # This is for generators that have a state.
                    generator.reset()

    @staticmethod
    def _set_direct_relation_property(
        this_node: dm.NodeApply, view_id: dm.ViewId, property_name: str, value: dict | list[dict]
    ) -> None:
        if this_node.sources is None:
            this_node.sources = []
        for source in this_node.sources:
            if source.source == view_id:
                if not isinstance(source.properties, dict):
                    source.properties = dict(source.properties) if source.properties else {}
                source.properties[property_name] = value
                break
        else:
            # This is the first property residing in this view
            # for this node
            this_node.sources.append(
                dm.NodeOrEdgeData(
                    source=view_id,
                    properties={property_name: value},
                )
            )

    @staticmethod
    def _to_leaf_children_by_parent(views: list[dm.View]) -> dict[dm.ViewId, list[dm.ViewId]]:
        leaf_children_by_parent: dict[dm.ViewId, set[dm.ViewId]] = defaultdict(set)
        for view in views:
            for parent in view.implements or []:
                leaf_children_by_parent[parent].add(view.as_id())

        leafs: set[dm.ViewId] = set()
        for view_id in TopologicalSorter(leaf_children_by_parent).static_order():
            if view_id not in leaf_children_by_parent:
                leafs.add(view_id)
                continue

            parents = leaf_children_by_parent[view_id] - leafs
            for parent in parents:
                leaf_children_by_parent[view_id].remove(parent)
                leaf_children_by_parent[view_id].update(leaf_children_by_parent[parent])

        return {k: sorted(v, key=lambda x: x.as_tuple()) for k, v in leaf_children_by_parent.items()}

from_data_model(data_model_id, instance_space, client, data_set_id=None, seed=None) classmethod

Creates a MockGenerator from a data model.

Parameters:

Name Type Description Default
data_model_id DataModelIdentifier

Identifier of the data model to generate mock data for.

required
instance_space str

The space to use for the generated nodes and edges.

required
client CogniteClient

An instance of the CogniteClient class.

required
data_set_id int | None

The data set id to use for TimeSeries, Sequences, and FileMetadata.

None
seed int | None

The seed to use for the random number generator.

None
default_config
required

Returns:

Name Type Description
MockGenerator MockGenerator

The mock generator.

Source code in cognite/pygen/utils/mock_generator.py
@classmethod
def from_data_model(
    cls,
    data_model_id: DataModelIdentifier,
    instance_space: str,
    client: CogniteClient,
    data_set_id: int | None = None,
    seed: int | None = None,
) -> MockGenerator:
    """Creates a MockGenerator from a data model.

    Args:
        data_model_id: Identifier of the data model to generate mock data for.
        instance_space: The space to use for the generated nodes and edges.
        client: An instance of the CogniteClient class.
        data_set_id: The data set id to use for TimeSeries, Sequences, and FileMetadata.
        seed: The seed to use for the random number generator.
        default_config:

    Returns:
        MockGenerator: The mock generator.

    """
    with _log_pygen_mock_call(client) as client:
        data_model = client.data_modeling.data_models.retrieve(
            ids=data_model_id,
            inline_views=True,
        ).latest_version()

    return cls(
        views=data_model.views,
        instance_space=instance_space,
        data_set_id=data_set_id,
        seed=seed,
    )

generate_mock_data(node_count=5, max_edge_per_type=5, null_values=0.25)

Generates mock data for the given data model/views.

Parameters:

Name Type Description Default
node_count int

The number of nodes to generate for each view.

5
max_edge_per_type int

The maximum number of edges to generate for each edge type.

5
null_values float

The probability of generating a null value for a nullable properties.

0.25

Returns:

Name Type Description
MockData MockData

The generated mock data.

Source code in cognite/pygen/utils/mock_generator.py
def generate_mock_data(
    self, node_count: int = 5, max_edge_per_type: int = 5, null_values: float = 0.25
) -> MockData:
    """Generates mock data for the given data model/views.


    Args:
        node_count: The number of nodes to generate for each view.
        max_edge_per_type: The maximum number of edges to generate for each edge type.
        null_values: The probability of generating a null value for a nullable properties.

    Returns:
        MockData: The generated mock data.
    """
    self._interfaces = {interface for view in self._views for interface in view.implements or []}
    mock_data = MockData()
    for connected_views in _connected_views(self._views):
        data = self._generate_views_mock_data(connected_views, node_count, max_edge_per_type, null_values)
        mock_data.extend(data)
    return mock_data

clean_model(client, model_id, remove_space=False)

Deletes the data model, the views and all the containers referenced by the views.

Parameters:

Name Type Description Default
client CogniteClient

Connected CogniteClient

required
model_id DataModelIdentifier

ID of the data model to delete.

required
remove_space bool

If True, the space will be deleted as well. Defaults to False.

False
Source code in cognite/pygen/utils/cdf.py
def clean_model(client: CogniteClient, model_id: DataModelIdentifier, remove_space: bool = False) -> None:
    """
    Deletes the data model, the views and all the containers referenced by the views.

    Args:
        client: Connected CogniteClient
        model_id: ID of the data model to delete.
        remove_space: If True, the space will be deleted as well. Defaults to False.

    """
    model = client.data_modeling.data_models.retrieve(model_id, inline_views=True).latest_version()
    views = ViewList([view for view in model.views if not view.is_global])
    containers = list(
        {
            prop.container
            for view in views
            for prop in (view.properties or {}).values()
            if isinstance(prop, MappedProperty)
        }
    )

    if containers:
        deleted_containers = client.data_modeling.containers.delete(containers)
        print(f"Deleted {len(deleted_containers)} containers")
    if views:
        for _ in range(3):
            deleted_views = client.data_modeling.views.delete(views.as_ids())
            print(f"Deleted {len(deleted_views)} views")

            retrieved = client.data_modeling.views.retrieve(ids=views.as_ids())
            if not retrieved:
                break
            # Views are not always successfully deleted on the first try, so we have a retry logic.
            sleep(1)
    deleted_model = client.data_modeling.data_models.delete(model_id)
    print(f"Deleted {len(deleted_model)} data models")

    if remove_space:
        clean_space(client, model.space)

clean_model_interactive(client, remove_space=False)

Interactive version of clean_model.

This will list all available spaces, and let the user select which one to delete from, and then list all available models in that space, and let the user select which one to delete.

Parameters:

Name Type Description Default
client CogniteClient

Connected CogniteClient

required
remove_space bool

If True, the space will be deleted as well. Defaults to False.

False
Source code in cognite/pygen/utils/cdf.py
def clean_model_interactive(client: CogniteClient, remove_space: bool = False) -> None:
    """
    Interactive version of clean_model.

    This will list all available spaces, and let the user select which one to delete from,
    and then list all available models in that space, and let the user select which one to delete.

    Args:
        client: Connected CogniteClient
        remove_space: If True, the space will be deleted as well. Defaults to False.

    """
    spaces = client.data_modeling.spaces.list(limit=-1)
    if not spaces:
        print("No spaces found")
        return
    index = _user_options(spaces.as_ids())
    selected_space = spaces[index]
    models = client.data_modeling.data_models.list(space=selected_space.space, limit=-1)
    if not models:
        print("No models found")
        return
    index = _user_options([model.as_id() for model in models])
    selected_model = models[index]
    clean_model(client, selected_model.as_id(), remove_space)

clean_space(client, space)

Deletes all data in a space.

This means all nodes, edges, views, containers, and data models located in the space.

Parameters:

Name Type Description Default
client CogniteClient

Connected CogniteClient

required
space str

The space to delete.

required
Source code in cognite/pygen/utils/cdf.py
def clean_space(client: CogniteClient, space: str) -> None:
    """Deletes all data in a space.

    This means all nodes, edges, views, containers, and data models located in the space.

    Args:
        client: Connected CogniteClient
        space: The space to delete.

    """
    edges = client.data_modeling.instances.list("edge", limit=-1, filter=filters.Equals(["edge", "space"], space))
    if edges:
        instances = client.data_modeling.instances.delete(edges=edges.as_ids())
        print(f"Deleted {len(instances.edges)} edges")
    nodes = client.data_modeling.instances.list("node", limit=-1, filter=filters.Equals(["node", "space"], space))
    if nodes:
        instances = client.data_modeling.instances.delete(nodes=nodes.as_ids())
        print(f"Deleted {len(instances.nodes)} nodes")
    views = client.data_modeling.views.list(limit=-1, space=space)
    if views:
        deleted_views = client.data_modeling.views.delete(views.as_ids())
        print(f"Deleted {len(deleted_views)} views")
    containers = client.data_modeling.containers.list(limit=-1, space=space)
    if containers:
        deleted_containers = client.data_modeling.containers.delete(containers.as_ids())
        print(f"Deleted {len(deleted_containers)} containers")
    if data_models := client.data_modeling.data_models.list(limit=-1, space=space):
        deleted_data_models = client.data_modeling.data_models.delete(data_models.as_ids())
        print(f"Deleted {len(deleted_data_models)} data models")
    deleted_space = client.data_modeling.spaces.delete(space)
    print(f"Deleted space {deleted_space}")

load_cognite_client_from_toml(toml_file='config.toml', section='cognite')

This is a small helper function to load a CogniteClient from a toml file.

The default name of the config file is "config.toml" and it should look like this:

[cognite]
project = "<cdf-project>"
tenant_id = "<tenant-id>"
cdf_cluster = "<cdf-cluster>"
client_id = "<client-id>"
client_secret = "<client-secret>"

Parameters:

Name Type Description Default
toml_file Path | str

Path to toml file

'config.toml'
section str | None

Name of the section in the toml file to use. If None, use the top level of the toml file. Defaults to "cognite".

'cognite'

Returns:

Type Description
CogniteClient

A CogniteClient with configurations from the toml file.

Source code in cognite/pygen/utils/cdf.py
def load_cognite_client_from_toml(
    toml_file: Path | str = "config.toml", section: str | None = "cognite"
) -> CogniteClient:
    """
    This is a small helper function to load a CogniteClient from a toml file.

    The default name of the config file is "config.toml" and it should look like this:

    ```toml
    [cognite]
    project = "<cdf-project>"
    tenant_id = "<tenant-id>"
    cdf_cluster = "<cdf-cluster>"
    client_id = "<client-id>"
    client_secret = "<client-secret>"
    ```

    Args:
        toml_file: Path to toml file
        section: Name of the section in the toml file to use. If None, use the top level of the toml file.
                 Defaults to "cognite".

    Returns:
        A CogniteClient with configurations from the toml file.
    """
    import toml

    toml_content = toml.load(toml_file)
    if section is not None:
        toml_content = toml_content[section]

    login_flow = toml_content.pop("login_flow", None)
    if login_flow == "interactive":
        return CogniteClient.default_oauth_interactive(**toml_content)
    else:
        return CogniteClient.default_oauth_client_credentials(**toml_content)