timeseries_shaper.cycles.cycles_extractor

  1from typing import Optional
  2import pandas as pd
  3import uuid
  4import logging
  5from ..base import Base
  6
  7class CycleExtractor(Base):
  8    """Class for processing cycles based on different criteria."""
  9
 10    def __init__(self, dataframe: pd.DataFrame, start_uuid: str, end_uuid: Optional[str] = None):
 11        """Initializes the class with the data and the UUIDs for cycle start and end."""
 12        super().__init__(dataframe)
 13
 14        # Validate input types
 15        if not isinstance(dataframe, pd.DataFrame):
 16            raise ValueError("dataframe must be a pandas DataFrame")
 17        if not isinstance(start_uuid, str):
 18            raise ValueError("start_uuid must be a string")
 19        
 20        self.df = dataframe  # Use the provided DataFrame directly
 21        self.start_uuid = start_uuid
 22        self.end_uuid = end_uuid if end_uuid else start_uuid
 23        logging.info(f"CycleExtractor initialized with start_uuid: {self.start_uuid} and end_uuid: {self.end_uuid}")
 24
 25    def process_persistent_cycle(self) -> pd.DataFrame:
 26        """Processes cycles where the value of the variable stays true during the cycle."""
 27        # Assuming dataframe is pre-filtered
 28        cycle_starts = self.df[self.df['value_bool'] == True]
 29        cycle_ends = self.df[self.df['value_bool'] == False]
 30
 31        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)
 32
 33    def process_trigger_cycle(self) -> pd.DataFrame:
 34        """Processes cycles where the value of the variable goes from true to false during the cycle."""
 35        # Assuming dataframe is pre-filtered
 36        cycle_starts = self.df[self.df['value_bool'] == True]
 37        cycle_ends = self.df[self.df['value_bool'] == False].shift(-1)
 38
 39        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)
 40
 41    def process_separate_start_end_cycle(self) -> pd.DataFrame:
 42        """Processes cycles where different variables indicate cycle start and end."""
 43        # Assuming dataframe is pre-filtered for both start_uuid and end_uuid
 44        cycle_starts = self.df[self.df['value_bool'] == True]
 45        cycle_ends = self.df[self.df['value_bool'] == True]
 46
 47        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)
 48
 49    def process_step_sequence(self, start_step: int, end_step: int) -> pd.DataFrame:
 50        """Processes cycles based on a step sequence, where specific integer values denote cycle start and end."""
 51        # Assuming dataframe is pre-filtered
 52        cycle_starts = self.df[self.df['value_integer'] == start_step]
 53        cycle_ends = self.df[self.df['value_integer'] == end_step]
 54
 55        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)
 56
 57    def process_state_change_cycle(self) -> pd.DataFrame:
 58        """Processes cycles where the start of a new cycle is the end of the previous cycle."""
 59        # Assuming dataframe is pre-filtered
 60        cycle_starts = self.df.copy()
 61        cycle_ends = self.df.shift(-1)
 62
 63        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)
 64
 65    def process_value_change_cycle(self) -> pd.DataFrame:
 66        """Processes cycles where a change in the value indicates a new cycle."""
 67        # Assuming dataframe is pre-filtered
 68    
 69        # Fill NaN or None values with appropriate defaults for diff() to work
 70        self.df['value_double'] = self.df['value_double'].fillna(0)  # Assuming numeric column
 71        self.df['value_bool'] = self.df['value_bool'].fillna(False)  # Assuming boolean column
 72        self.df['value_string'] = self.df['value_string'].fillna('')  # Assuming string column
 73        self.df['value_integer'] = self.df['value_integer'].fillna(0)  # Assuming integer column
 74    
 75        # Detect changes across the relevant columns using diff()
 76        self.df['value_change'] = (
 77            (self.df['value_double'].diff().ne(0)) |
 78            (self.df['value_bool'].diff().ne(0)) |
 79            (self.df['value_string'].shift().ne(self.df['value_string'])) |
 80            (self.df['value_integer'].diff().ne(0))
 81        )
 82
 83        # Define cycle starts and ends based on changes
 84        cycle_starts = self.df[self.df['value_change'] == True]
 85        cycle_ends = self.df[self.df['value_change'] == True].shift(-1)
 86
 87        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)
 88
 89    def _generate_cycle_dataframe(self, cycle_starts: pd.DataFrame, cycle_ends: pd.DataFrame) -> pd.DataFrame:
 90        """Generates a DataFrame with cycle start and end times."""
 91        cycle_df = pd.DataFrame(columns=['cycle_start', 'cycle_end', 'cycle_uuid'])
 92        cycle_ends_iter = iter(cycle_ends['systime'])
 93
 94        try:
 95            next_cycle_end = next(cycle_ends_iter)
 96            for _, start_row in cycle_starts.iterrows():
 97                start_time = start_row['systime']
 98                while next_cycle_end <= start_time:
 99                    next_cycle_end = next(cycle_ends_iter)
100                cycle_df.loc[len(cycle_df)] = {
101                    'cycle_start': start_time,
102                    'cycle_end': next_cycle_end,
103                    'cycle_uuid': str(uuid.uuid4())
104                }
105        except StopIteration:
106            logging.warning("Cycle end data ran out while generating cycles.")
107
108        logging.info(f"Generated {len(cycle_df)} cycles.")
109        return cycle_df
class CycleExtractor(timeseries_shaper.base.Base):
  8class CycleExtractor(Base):
  9    """Class for processing cycles based on different criteria."""
 10
 11    def __init__(self, dataframe: pd.DataFrame, start_uuid: str, end_uuid: Optional[str] = None):
 12        """Initializes the class with the data and the UUIDs for cycle start and end."""
 13        super().__init__(dataframe)
 14
 15        # Validate input types
 16        if not isinstance(dataframe, pd.DataFrame):
 17            raise ValueError("dataframe must be a pandas DataFrame")
 18        if not isinstance(start_uuid, str):
 19            raise ValueError("start_uuid must be a string")
 20        
 21        self.df = dataframe  # Use the provided DataFrame directly
 22        self.start_uuid = start_uuid
 23        self.end_uuid = end_uuid if end_uuid else start_uuid
 24        logging.info(f"CycleExtractor initialized with start_uuid: {self.start_uuid} and end_uuid: {self.end_uuid}")
 25
 26    def process_persistent_cycle(self) -> pd.DataFrame:
 27        """Processes cycles where the value of the variable stays true during the cycle."""
 28        # Assuming dataframe is pre-filtered
 29        cycle_starts = self.df[self.df['value_bool'] == True]
 30        cycle_ends = self.df[self.df['value_bool'] == False]
 31
 32        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)
 33
 34    def process_trigger_cycle(self) -> pd.DataFrame:
 35        """Processes cycles where the value of the variable goes from true to false during the cycle."""
 36        # Assuming dataframe is pre-filtered
 37        cycle_starts = self.df[self.df['value_bool'] == True]
 38        cycle_ends = self.df[self.df['value_bool'] == False].shift(-1)
 39
 40        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)
 41
 42    def process_separate_start_end_cycle(self) -> pd.DataFrame:
 43        """Processes cycles where different variables indicate cycle start and end."""
 44        # Assuming dataframe is pre-filtered for both start_uuid and end_uuid
 45        cycle_starts = self.df[self.df['value_bool'] == True]
 46        cycle_ends = self.df[self.df['value_bool'] == True]
 47
 48        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)
 49
 50    def process_step_sequence(self, start_step: int, end_step: int) -> pd.DataFrame:
 51        """Processes cycles based on a step sequence, where specific integer values denote cycle start and end."""
 52        # Assuming dataframe is pre-filtered
 53        cycle_starts = self.df[self.df['value_integer'] == start_step]
 54        cycle_ends = self.df[self.df['value_integer'] == end_step]
 55
 56        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)
 57
 58    def process_state_change_cycle(self) -> pd.DataFrame:
 59        """Processes cycles where the start of a new cycle is the end of the previous cycle."""
 60        # Assuming dataframe is pre-filtered
 61        cycle_starts = self.df.copy()
 62        cycle_ends = self.df.shift(-1)
 63
 64        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)
 65
 66    def process_value_change_cycle(self) -> pd.DataFrame:
 67        """Processes cycles where a change in the value indicates a new cycle."""
 68        # Assuming dataframe is pre-filtered
 69    
 70        # Fill NaN or None values with appropriate defaults for diff() to work
 71        self.df['value_double'] = self.df['value_double'].fillna(0)  # Assuming numeric column
 72        self.df['value_bool'] = self.df['value_bool'].fillna(False)  # Assuming boolean column
 73        self.df['value_string'] = self.df['value_string'].fillna('')  # Assuming string column
 74        self.df['value_integer'] = self.df['value_integer'].fillna(0)  # Assuming integer column
 75    
 76        # Detect changes across the relevant columns using diff()
 77        self.df['value_change'] = (
 78            (self.df['value_double'].diff().ne(0)) |
 79            (self.df['value_bool'].diff().ne(0)) |
 80            (self.df['value_string'].shift().ne(self.df['value_string'])) |
 81            (self.df['value_integer'].diff().ne(0))
 82        )
 83
 84        # Define cycle starts and ends based on changes
 85        cycle_starts = self.df[self.df['value_change'] == True]
 86        cycle_ends = self.df[self.df['value_change'] == True].shift(-1)
 87
 88        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)
 89
 90    def _generate_cycle_dataframe(self, cycle_starts: pd.DataFrame, cycle_ends: pd.DataFrame) -> pd.DataFrame:
 91        """Generates a DataFrame with cycle start and end times."""
 92        cycle_df = pd.DataFrame(columns=['cycle_start', 'cycle_end', 'cycle_uuid'])
 93        cycle_ends_iter = iter(cycle_ends['systime'])
 94
 95        try:
 96            next_cycle_end = next(cycle_ends_iter)
 97            for _, start_row in cycle_starts.iterrows():
 98                start_time = start_row['systime']
 99                while next_cycle_end <= start_time:
100                    next_cycle_end = next(cycle_ends_iter)
101                cycle_df.loc[len(cycle_df)] = {
102                    'cycle_start': start_time,
103                    'cycle_end': next_cycle_end,
104                    'cycle_uuid': str(uuid.uuid4())
105                }
106        except StopIteration:
107            logging.warning("Cycle end data ran out while generating cycles.")
108
109        logging.info(f"Generated {len(cycle_df)} cycles.")
110        return cycle_df

Class for processing cycles based on different criteria.

CycleExtractor( dataframe: pandas.core.frame.DataFrame, start_uuid: str, end_uuid: Optional[str] = None)
11    def __init__(self, dataframe: pd.DataFrame, start_uuid: str, end_uuid: Optional[str] = None):
12        """Initializes the class with the data and the UUIDs for cycle start and end."""
13        super().__init__(dataframe)
14
15        # Validate input types
16        if not isinstance(dataframe, pd.DataFrame):
17            raise ValueError("dataframe must be a pandas DataFrame")
18        if not isinstance(start_uuid, str):
19            raise ValueError("start_uuid must be a string")
20        
21        self.df = dataframe  # Use the provided DataFrame directly
22        self.start_uuid = start_uuid
23        self.end_uuid = end_uuid if end_uuid else start_uuid
24        logging.info(f"CycleExtractor initialized with start_uuid: {self.start_uuid} and end_uuid: {self.end_uuid}")

Initializes the class with the data and the UUIDs for cycle start and end.

df
start_uuid
end_uuid
def process_persistent_cycle(self) -> pandas.core.frame.DataFrame:
26    def process_persistent_cycle(self) -> pd.DataFrame:
27        """Processes cycles where the value of the variable stays true during the cycle."""
28        # Assuming dataframe is pre-filtered
29        cycle_starts = self.df[self.df['value_bool'] == True]
30        cycle_ends = self.df[self.df['value_bool'] == False]
31
32        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)

Processes cycles where the value of the variable stays true during the cycle.

def process_trigger_cycle(self) -> pandas.core.frame.DataFrame:
34    def process_trigger_cycle(self) -> pd.DataFrame:
35        """Processes cycles where the value of the variable goes from true to false during the cycle."""
36        # Assuming dataframe is pre-filtered
37        cycle_starts = self.df[self.df['value_bool'] == True]
38        cycle_ends = self.df[self.df['value_bool'] == False].shift(-1)
39
40        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)

Processes cycles where the value of the variable goes from true to false during the cycle.

def process_separate_start_end_cycle(self) -> pandas.core.frame.DataFrame:
42    def process_separate_start_end_cycle(self) -> pd.DataFrame:
43        """Processes cycles where different variables indicate cycle start and end."""
44        # Assuming dataframe is pre-filtered for both start_uuid and end_uuid
45        cycle_starts = self.df[self.df['value_bool'] == True]
46        cycle_ends = self.df[self.df['value_bool'] == True]
47
48        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)

Processes cycles where different variables indicate cycle start and end.

def process_step_sequence(self, start_step: int, end_step: int) -> pandas.core.frame.DataFrame:
50    def process_step_sequence(self, start_step: int, end_step: int) -> pd.DataFrame:
51        """Processes cycles based on a step sequence, where specific integer values denote cycle start and end."""
52        # Assuming dataframe is pre-filtered
53        cycle_starts = self.df[self.df['value_integer'] == start_step]
54        cycle_ends = self.df[self.df['value_integer'] == end_step]
55
56        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)

Processes cycles based on a step sequence, where specific integer values denote cycle start and end.

def process_state_change_cycle(self) -> pandas.core.frame.DataFrame:
58    def process_state_change_cycle(self) -> pd.DataFrame:
59        """Processes cycles where the start of a new cycle is the end of the previous cycle."""
60        # Assuming dataframe is pre-filtered
61        cycle_starts = self.df.copy()
62        cycle_ends = self.df.shift(-1)
63
64        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)

Processes cycles where the start of a new cycle is the end of the previous cycle.

def process_value_change_cycle(self) -> pandas.core.frame.DataFrame:
66    def process_value_change_cycle(self) -> pd.DataFrame:
67        """Processes cycles where a change in the value indicates a new cycle."""
68        # Assuming dataframe is pre-filtered
69    
70        # Fill NaN or None values with appropriate defaults for diff() to work
71        self.df['value_double'] = self.df['value_double'].fillna(0)  # Assuming numeric column
72        self.df['value_bool'] = self.df['value_bool'].fillna(False)  # Assuming boolean column
73        self.df['value_string'] = self.df['value_string'].fillna('')  # Assuming string column
74        self.df['value_integer'] = self.df['value_integer'].fillna(0)  # Assuming integer column
75    
76        # Detect changes across the relevant columns using diff()
77        self.df['value_change'] = (
78            (self.df['value_double'].diff().ne(0)) |
79            (self.df['value_bool'].diff().ne(0)) |
80            (self.df['value_string'].shift().ne(self.df['value_string'])) |
81            (self.df['value_integer'].diff().ne(0))
82        )
83
84        # Define cycle starts and ends based on changes
85        cycle_starts = self.df[self.df['value_change'] == True]
86        cycle_ends = self.df[self.df['value_change'] == True].shift(-1)
87
88        return self._generate_cycle_dataframe(cycle_starts, cycle_ends)

Processes cycles where a change in the value indicates a new cycle.

Inherited Members
timeseries_shaper.base.Base
dataframe
get_dataframe