Skip to content

ts_shape.transform.time_functions.timezone_shift ¤

Classes:

  • TimezoneShift

    A class for shifting timestamps in a DataFrame to a different timezone, with methods to handle timezone localization and conversion.

TimezoneShift ¤

TimezoneShift(dataframe: DataFrame, column_name: str = 'systime')

Bases: Base

A class for shifting timestamps in a DataFrame to a different timezone, with methods to handle timezone localization and conversion.

Parameters:

  • dataframe ¤

    (DataFrame) –

    The DataFrame to be processed.

  • column_name ¤

    (str, default: 'systime' ) –

    The column to sort by. Default is 'systime'. If the column is not found or is not a time column, the class will attempt to detect other time columns.

Methods:

Source code in src/ts_shape/utils/base.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def __init__(self, dataframe: pd.DataFrame, column_name: str = 'systime') -> pd.DataFrame:
    """
    Initializes the Base with a DataFrame, detects time columns, converts them to datetime,
    and sorts the DataFrame by the specified column (or the detected time column if applicable).

    Args:
        dataframe (pd.DataFrame): The DataFrame to be processed.
        column_name (str): The column to sort by. Default is 'systime'. If the column is not found or is not a time column, the class will attempt to detect other time columns.
    """
    self.dataframe = dataframe.copy()

    # Attempt to convert the specified column_name to datetime if it exists
    if column_name in self.dataframe.columns:
        self.dataframe[column_name] = pd.to_datetime(self.dataframe[column_name], errors='coerce')
    else:
        # If the column_name is not in the DataFrame, fallback to automatic time detection
        time_columns = [col for col in self.dataframe.columns if 'time' in col.lower() or 'date' in col.lower()]

        # Convert all detected time columns to datetime, if any
        for col in time_columns:
            self.dataframe[col] = pd.to_datetime(self.dataframe[col], errors='coerce')

        # If any time columns are detected, sort by the first one; otherwise, do nothing
        if time_columns:
            column_name = time_columns[0]

    # Sort by the datetime column (either specified or detected)
    if column_name in self.dataframe.columns:
        self.dataframe = self.dataframe.sort_values(by=column_name)

add_timezone_column classmethod ¤

add_timezone_column(dataframe: DataFrame, time_column: str, input_timezone: str, target_timezone: str) -> DataFrame

Creates a new column with timestamps converted from an input timezone to a target timezone, without altering the original column.

Parameters:

  • dataframe ¤

    (DataFrame) –

    The DataFrame containing the data.

  • time_column ¤

    (str) –

    The name of the time column to convert.

  • input_timezone ¤

    (str) –

    The timezone of the input timestamps.

  • target_timezone ¤

    (str) –

    The target timezone.

Returns:

  • DataFrame

    pd.DataFrame: A DataFrame with an additional column for the shifted timezone.

Source code in src/ts_shape/transform/time_functions/timezone_shift.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
@classmethod
def add_timezone_column(cls, dataframe: pd.DataFrame, time_column: str, input_timezone: str, target_timezone: str) -> pd.DataFrame:
    """
    Creates a new column with timestamps converted from an input timezone to a target timezone, without altering the original column.

    Args:
        dataframe (pd.DataFrame): The DataFrame containing the data.
        time_column (str): The name of the time column to convert.
        input_timezone (str): The timezone of the input timestamps.
        target_timezone (str): The target timezone.

    Returns:
        pd.DataFrame: A DataFrame with an additional column for the shifted timezone.
    """
    # Duplicate the DataFrame to prevent modifying the original column
    df_copy = dataframe.copy()

    # Create the new timezone-shifted column
    new_column = f"{time_column}_{target_timezone.replace('/', '_')}"
    df_copy[new_column] = df_copy[time_column]

    # Apply the timezone shift to the new column
    df_copy = cls.shift_timezone(df_copy, new_column, input_timezone, target_timezone)

    return df_copy

calculate_time_difference classmethod ¤

calculate_time_difference(dataframe: DataFrame, start_column: str, end_column: str) -> Series

Calculates the time difference between two timestamp columns.

Parameters:

  • dataframe ¤

    (DataFrame) –

    The DataFrame containing the data.

  • start_column ¤

    (str) –

    The name of the start time column.

  • end_column ¤

    (str) –

    The name of the end time column.

Returns:

  • Series

    pd.Series: A Series with the time differences in seconds.

Source code in src/ts_shape/transform/time_functions/timezone_shift.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
@classmethod
def calculate_time_difference(cls, dataframe: pd.DataFrame, start_column: str, end_column: str) -> pd.Series:
    """
    Calculates the time difference between two timestamp columns.

    Args:
        dataframe (pd.DataFrame): The DataFrame containing the data.
        start_column (str): The name of the start time column.
        end_column (str): The name of the end time column.

    Returns:
        pd.Series: A Series with the time differences in seconds.
    """
    # Check if both columns are timezone-aware or both are timezone-naive
    start_is_aware = dataframe[start_column].dt.tz is not None
    end_is_aware = dataframe[end_column].dt.tz is not None

    if start_is_aware != end_is_aware:
        raise ValueError("Both columns must be either timezone-aware or timezone-naive.")

    # If timezone-aware, convert both columns to UTC for comparison
    if start_is_aware:
        start_times = dataframe[start_column].dt.tz_convert('UTC')
        end_times = dataframe[end_column].dt.tz_convert('UTC')
    else:
        start_times = dataframe[start_column]
        end_times = dataframe[end_column]

    # Calculate the difference in seconds
    time_difference = (end_times - start_times).dt.total_seconds()

    return time_difference

detect_timezone_awareness classmethod ¤

detect_timezone_awareness(dataframe: DataFrame, time_column: str) -> bool

Detects if a time column in a DataFrame is timezone-aware.

Parameters:

  • dataframe ¤

    (DataFrame) –

    The DataFrame containing the data.

  • time_column ¤

    (str) –

    The name of the time column to check.

Returns:

  • bool ( bool ) –

    True if the column is timezone-aware, False otherwise.

Source code in src/ts_shape/transform/time_functions/timezone_shift.py
83
84
85
86
87
88
89
90
91
92
93
94
95
@classmethod
def detect_timezone_awareness(cls, dataframe: pd.DataFrame, time_column: str) -> bool:
    """
    Detects if a time column in a DataFrame is timezone-aware.

    Args:
        dataframe (pd.DataFrame): The DataFrame containing the data.
        time_column (str): The name of the time column to check.

    Returns:
        bool: True if the column is timezone-aware, False otherwise.
    """
    return dataframe[time_column].dt.tz is not None

get_dataframe ¤

get_dataframe() -> DataFrame

Returns the processed DataFrame.

Source code in src/ts_shape/utils/base.py
34
35
36
def get_dataframe(self) -> pd.DataFrame:
    """Returns the processed DataFrame."""
    return self.dataframe

list_available_timezones classmethod ¤

list_available_timezones() -> list

Returns a list of all available timezones.

Returns:

  • list ( list ) –

    A list of strings representing all available timezones.

Source code in src/ts_shape/transform/time_functions/timezone_shift.py
73
74
75
76
77
78
79
80
81
@classmethod
def list_available_timezones(cls) -> list:
    """
    Returns a list of all available timezones.

    Returns:
        list: A list of strings representing all available timezones.
    """
    return pytz.all_timezones

revert_to_original_timezone classmethod ¤

revert_to_original_timezone(dataframe: DataFrame, time_column: str, original_timezone: str) -> DataFrame

Reverts a timezone-shifted time column back to the original timezone.

Parameters:

  • dataframe ¤

    (DataFrame) –

    The DataFrame containing the data.

  • time_column ¤

    (str) –

    The name of the time column to revert.

  • original_timezone ¤

    (str) –

    The original timezone to revert to.

Returns:

  • DataFrame

    pd.DataFrame: A DataFrame with timestamps reverted to the original timezone.

Source code in src/ts_shape/transform/time_functions/timezone_shift.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
@classmethod
def revert_to_original_timezone(cls, dataframe: pd.DataFrame, time_column: str, original_timezone: str) -> pd.DataFrame:
    """
    Reverts a timezone-shifted time column back to the original timezone.

    Args:
        dataframe (pd.DataFrame): The DataFrame containing the data.
        time_column (str): The name of the time column to revert.
        original_timezone (str): The original timezone to revert to.

    Returns:
        pd.DataFrame: A DataFrame with timestamps reverted to the original timezone.
    """
    # Validate the original timezone
    if original_timezone not in pytz.all_timezones:
        raise ValueError(f"Invalid original timezone: {original_timezone}")

    # Convert to the original timezone
    dataframe[time_column] = dataframe[time_column].dt.tz_convert(original_timezone)

    return dataframe

shift_timezone classmethod ¤

shift_timezone(dataframe: DataFrame, time_column: str, input_timezone: str, target_timezone: str) -> DataFrame

Shifts timestamps in the specified column of a DataFrame from a given timezone to a target timezone.

Parameters:

  • dataframe ¤

    (DataFrame) –

    The DataFrame containing the data.

  • time_column ¤

    (str) –

    The name of the time column to convert.

  • input_timezone ¤

    (str) –

    The timezone of the input timestamps (e.g., 'UTC' or 'America/New_York').

  • target_timezone ¤

    (str) –

    The target timezone to shift to (e.g., 'America/New_York').

Returns:

  • DataFrame

    pd.DataFrame: A DataFrame with timestamps converted to the target timezone.

Source code in src/ts_shape/transform/time_functions/timezone_shift.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
@classmethod
def shift_timezone(cls, dataframe: pd.DataFrame, time_column: str, input_timezone: str, target_timezone: str) -> pd.DataFrame:
    """
    Shifts timestamps in the specified column of a DataFrame from a given timezone to a target timezone.

    Args:
        dataframe (pd.DataFrame): The DataFrame containing the data.
        time_column (str): The name of the time column to convert.
        input_timezone (str): The timezone of the input timestamps (e.g., 'UTC' or 'America/New_York').
        target_timezone (str): The target timezone to shift to (e.g., 'America/New_York').

    Returns:
        pd.DataFrame: A DataFrame with timestamps converted to the target timezone.
    """
    # Validate timezones
    if input_timezone not in pytz.all_timezones:
        raise ValueError(f"Invalid input timezone: {input_timezone}")
    if target_timezone not in pytz.all_timezones:
        raise ValueError(f"Invalid target timezone: {target_timezone}")

    # Ensure the time column is in datetime format
    if not pd.api.types.is_datetime64_any_dtype(dataframe[time_column]):
        raise ValueError(f"Column '{time_column}' must contain datetime values.")

    # Localize to the specified input timezone if timestamps are naive
    dataframe[time_column] = pd.to_datetime(dataframe[time_column])
    if dataframe[time_column].dt.tz is None:
        dataframe[time_column] = dataframe[time_column].dt.tz_localize(input_timezone)
    else:
        # Convert from the existing timezone to the specified input timezone, if they differ
        dataframe[time_column] = dataframe[time_column].dt.tz_convert(input_timezone)

    # Convert to the target timezone
    dataframe[time_column] = dataframe[time_column].dt.tz_convert(target_timezone)

    return dataframe