Source code for lib.sedna.algorithms.seen_task_learning.task_definition.task_definition_by_origin

# Copyright 2023 The KubeEdge Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Any, Tuple

from sedna.datasources import BaseDataSource
from sedna.common.class_factory import ClassType, ClassFactory

from ..artifact import Task
from .base_task_definition import BaseTaskDefinition


@ClassFactory.register(ClassType.STP)
[docs]class TaskDefinitionByOrigin(BaseTaskDefinition): """ Dividing datasets based on the their origins. Parameters ---------- attr_filed Tuple[Metadata] metadata is usually a class feature label with a finite values. """ def __init__(self, **kwargs): super(TaskDefinitionByOrigin, self).__init__() self.attribute = kwargs.get("attribute").split(", ") self.city = kwargs.get("city")
[docs] def __call__(self, samples: BaseDataSource, **kwargs) -> Tuple[List[Task], Any, BaseDataSource]: tasks = [] d_type = samples.data_type task_index = dict(zip(self.attribute, range(len(self.attribute)))) sample_index = range(samples.num_examples()) _idx = [i for i in sample_index if self.city in samples.y[i]] _y = samples.y[_idx] _x = samples.x[_idx] _sample = BaseDataSource(data_type=d_type) _sample.x, _sample.y = _x, _y g_attr = f"{self.attribute[0]}.model" task_obj = Task(entry=g_attr, samples=_sample, meta_attr=self.attribute[0]) tasks.append(task_obj) _idx = list(set(sample_index) - set(_idx)) _y = samples.y[_idx] _x = samples.x[_idx] _sample = BaseDataSource(data_type=d_type) _sample.x, _sample.y = _x, _y g_attr = f"{self.attribute[-1]}.model" task_obj = Task(entry=g_attr, samples=_sample, meta_attr=self.attribute[-1]) tasks.append(task_obj) return tasks, task_index, samples