Source code for ladybug.datacollection

"""Ladybug data collection."""
from .header import Header
from .datatype import DataPoint

from collections import OrderedDict

try:
    from itertools import izip as zip
except ImportError:
    # python 3
    xrange = range


[docs]class DataCollection(object): """A list of data with a header.""" __slots__ = ('_header', '_data') def __init__(self, data=None, header=None): """Init class.""" self.header = header if not data: data = [] elif not hasattr(data, '__iter__'): assert hasattr(data, 'isDataPoint'), \ 'Expected DataPoint got {}'.format(type(data)) data = [data] for d in data: assert hasattr(d, 'isDataPoint'), \ 'Expected DataPoint got {}'.format(type(d)) self._data = list(data)
[docs] @classmethod def from_json(cls, data): """Create a data collection from a dictionary. Args: { "data": [], //An array of Ladybug data points, "header": {} // A Ladybug header } """ if 'data' not in data: input_data = [] else: input_data = [DataPoint.from_json(d) for d in data['data']] if 'header' not in data: data['header'] = {} return cls(input_data, Header.from_json(data['header']))
[docs] @classmethod def from_list(cls, lst, location=None, data_type=None, unit=None, analysis_period=None): """Create a data collection from a list. lst items can be DataPoint or other values. Args: lst: A list of data. location: location data as a ladybug Location or location string (Default: unknown). data_type: Type of data (e.g. Temperature) (Default: unknown). unit: data_type unit (Default: unknown). analysis_period: A Ladybug analysis period (Defualt: None) """ header = Header(location, data_type, unit, analysis_period) if analysis_period: return cls.from_data_and_datetimes(lst, analysis_period.datetimes, header) else: data = tuple(DataPoint.from_data(d) for d in lst) return cls(data, header)
[docs] @classmethod def from_data_and_datetimes(cls, data, datetimes, header=None): """Create a list from data and dateteimes.""" _d = tuple(DataPoint(v, d) for v, d in zip(data, datetimes)) return cls(_d, header)
[docs] @classmethod def from_data_and_analysis_period(cls, data, analysis_period, header=None): """Create a list from data and analysis period.""" return cls.from_data_and_datetimes(data, analysis_period.datetimes, header)
@property def header(self): """Get or set header.""" return self._header @header.setter def header(self, h): self._header = None if not h else Header.from_header(h)
[docs] def append(self, d): """Append a single item to the list.""" assert hasattr(d, 'isDataPoint'), \ 'Expected DataPoint got {}'.format(type(d)) self._data.append(d)
[docs] def extend(self, new_data): """Extend a number of items to the end of items.""" for d in new_data: assert hasattr(d, 'isDataPoint'), \ 'Expected DataPoint got {}'.format(type(d)) self._data.extend(new_data)
[docs] def insert(self, i, d): """Insert an item at a given position.""" assert hasattr(d, 'isDataPoint'), \ 'Expected DataPoint got {}'.format(type(d)) assert isinstance(i, int), \ 'Expected Integer got {}'.format(type(i)) self._data.insert(i, d)
[docs] def pop(self, i=-1): """Remove the item at the given position in the data collection, and return it. If no index is specified, a.pop() removes and returns the last item in the list. """ assert isinstance(i, int), \ 'Expected Integer got {}'.format(type(i)) assert i < len(self._data), \ 'Item({}) is larger than the length of the data collection({})' \ .format(i, len(self._data)) return self._data.pop(i)
@property def datetimes(self): """Return datetimes for this collection as a tuple.""" return tuple(value.datetime for value in self) @property def values(self): """Return the list of values.""" return self._data
[docs] def duplicate(self): """Duplicate current data list.""" return DataCollection(self.values, self.header)
[docs] @staticmethod def average(data): """Return average value for a list of ladybug data.""" values = (value.value for value in data) return sum(values) / len(data)
[docs] @staticmethod def group_data_by_month(data, month_range=xrange(1, 13)): """Return a dictionary of values where values are grouped for each month. Key values are between 1-12 Args: data: A list of DataPoint to be processed month_range: A list of numbers for months. Default is 1-12 """ hourly_data_by_month = OrderedDict() for m in month_range: hourly_data_by_month[m] = [] for d in data: try: hourly_data_by_month[d.datetime.month].append(d) except KeyError: # month is not there pass return hourly_data_by_month
[docs] def group_by_month(self, month_range=xrange(1, 13)): """ Return a dictionary of values where values are grouped for each month. Key values are between 1-12 Args: month_range: A list of numbers for months. Default is 1-12 Usage: epwfile = EPW("epw file address") monthly_values = epwfile.dry_bulb_temperature.group_by_month() print(monthly_values[2]) # returns values for the month of March """ return self.group_data_by_month(self.values, month_range)
[docs] @staticmethod def group_data_by_day(data, day_range=xrange(1, 366)): """ Return a dictionary of values where values are grouped by each day of year. Key values are between 1-365 Args: data: A list of DataPoint to be processed day_range: A list of numbers for days. Default is 1-365 """ hourly_data_by_day = OrderedDict() for d in day_range: hourly_data_by_day[d] = [] for d in data: try: hourly_data_by_day[d.datetime.doy].append(d) except KeyError: # day is not there pass return hourly_data_by_day
[docs] def group_by_day(self, day_range=xrange(1, 366)): """ Return a dictionary of values where values are grouped by each day of year. Key values are between 1-365 Args: day_range: A list of numbers for days. Default is 1-365 user_dataList: An optional data list of DataPoint to be processed Usage: epwfile = EPW("epw file address") daily_values = epwfile.dry_bulb_temperature.group_by_day(range(1, 30)) print(daily_values[2]) # returns values for the second day of year """ return self.group_data_by_day(self.values, day_range)
[docs] @staticmethod def group_data_by_hour(data, hour_range=xrange(0, 24)): """Return a dictionary of values where values are grouped by each hour of day. Key values are between 0-23 Args: data: A list of DataPoint to be processed hour_range: A list of numbers for hours. Default is 1-24 """ hourly_data_by_hour = OrderedDict() for h in hour_range: hourly_data_by_hour[h] = [] for d in data: try: hourly_data_by_hour[d.datetime.hour].append(d) except KeyError: # day is not there pass return hourly_data_by_hour
[docs] def group_by_hour(self, hour_range=xrange(0, 24)): """Return a dictionary of values where values are grouped by each hour of day. Key values are between 0-23 Args: hour_range: A list of numbers for hours. Default is 1-24 user_dataList: An optional data list of DataPoint to be processed Usage: epwfile = EPW("epw file address") monthly_values = epwfile.dry_bulb_temperature.group_by_month([1]) grouped_hourly_data = epwfile.dry_bulb_temperature.group_data_dataBy_hour( monthly_values[1]) for hour, data in grouped_hourly_data.items(): print("average temperature values for hour {} during JAN is {} {}" .format(hour, core._dataList.average(data), DBT.header.unit)) """ return self.group_data_by_hour(self.values, hour_range)
[docs] def update_data_for_hours_of_year(self, values, hours_of_year): """Update values new set of values for a list of hours of the year. Length of values should be equal to number of hours in hours of year. Args: values: A list of values to be replaced in the file hours_of_year: A list of hoy between 1 and 8760 """ # check length of data vs length of analysis hours_of_year if len(values) != len(hours_of_year): raise ValueError("Length of values %d is not equal to " + "number of hours in analysis period %d" % (len(values), len(hours_of_year))) # update values updated_count = 0 for data in self.values: try: # find matching index for input data index = hours_of_year.index(data.datetime.hoy) except ValueError: continue else: # update the value data.value = values[index] updated_count += 1 print("%s updated for %d hour%s." % ('Values are' if len(values) > 1 else 'Value is', updated_count, 's' if len(values) > 1 else '')) # return self for chaining methods return self
[docs] def update_data_for_an_hour(self, value, hour_of_year): """ Replace current value in data list with a new value for a specific hoy. Args: value: A single value hours_of_year: The hour of the year """ return self.update_data_for_hours_of_year((value,), (hour_of_year,))
[docs] def update_data_for_analysis_period(self, values, analysis_period): """Update values with new set of values for an analysis period. Length of values should be equal to number of hours in analysis period. Args: values: A list of values to be replaced in the file analysis_period: An analysis period for input the input values. Default is set to the whole year. """ return self.update_data_for_hours_of_year(values, analysis_period.hoys)
[docs] def interpolate_data(self, timestep, cumulative=False): """Interpolate data for a finer timestep using a linear interpolation. Args: timestep: Target timestep as an integer. Target timestep must be divisable by current timestep. cumulative: A boolean that sets whether the interpolation should treat the data colection values as cumulative, in which case the value at each timestep is the value over that timestep (instead of over the hour). The default is set to False to yeild average values in between each of the hours. """ assert self.header is not None, 'Header cannot be None for interpolation.' assert timestep % self.header.analysis_period.timestep == 0, \ 'Target timestep({}) must be divisable by current timestep({})' \ .format(timestep, self.header.analysis_period.timestep) assert isinstance(cumulative, bool), \ 'Expected Boolean got {}'.format(type(cumulative)) _minutes_step = int(60 / int(timestep / self.header.analysis_period.timestep)) _data_length = len(self.values) # generate new data _data = tuple( self[d].__class__(_v, self[d].datetime.add_minute(step * _minutes_step)) for d in xrange(_data_length) for _v, step in zip(self.xxrange(self[d], self[(d + 1) % _data_length], timestep), xrange(timestep)) ) # divide cumulative values by timestep if cumulative is True: for i, d in enumerate(_data): _data[i].value = d.value / timestep # shift data if half-hour interpolation has been selected. if self.header.middle_hour is True: shift_dist = int(timestep / 2) _data = _data[-shift_dist:] + _data[:-shift_dist] for i, d in enumerate(_data): _data[i - shift_dist].datetime = d.datetime return _data
[docs] @staticmethod def xxrange(start, end, step_count): """Generate n values between start and end.""" _step = (end - start) / float(step_count) return (start + (i * _step) for i in xrange(int(step_count)))
[docs] def filter_by_analysis_period(self, analysis_period=None): """ Filter a list based on an analysis period. Args: analysis period: A Ladybug analysis period Return: A new _dataList with filtered data Usage: # start of Feb to end of Mar analysis_period = Analysis_period(2,1,1,3,31,24) epw = EPW("c:/ladybug/weatherdata.epw") DBT = epw.dry_bulb_temperature filteredDBT = DBT.filter_by_analysis_period(analysis_period) """ if analysis_period.timestep != 1: # interpolate data for smaller timestep _int_data = self.interpolate_data(timestep=analysis_period.timestep) # create a new header _hea = self.header.duplicate() _hea.analysis_period = analysis_period _data = DataCollection(_int_data, _hea) else: _data = self if not analysis_period or analysis_period.is_annual: return _data.duplicate() # create a new filtered_data _filtered_data = _data.filter_by_hoys(analysis_period.hoys) if self.header: _filtered_data.header.analysis_period = analysis_period return _filtered_data
[docs] def filter_by_moys(self, moys): """Filter the list based on a list of minutes of the year. Args: moys: A List of minutes of the year [0..8759 * 60] Return: A new _dataList with filtered data Usage: moys = range(0, 48 * 60) # The first two days of the year epw = EPW("c:/ladybug/weatherdata.epw") DBT = epw.dry_bulb_temperature filteredDBT = DBT.filter_by_moys(moys) """ # There is no guarantee that data is continuous so I iterate through the # each data point one by one _filtered_data = [d for d in self.values if d.datetime.moy in moys] # create a new filtered_data if self.header: _filteredHeader = self.header.duplicate() _filteredHeader.analysis_period = None return DataCollection(_filtered_data, _filteredHeader) else: return DataCollection(_filtered_data)
[docs] def filter_by_hoys(self, hoys): """Filter the list based on an analysis period. Args: hoys: A List of hours of the year 0..8759 Return: A new _dataList with filtered data Usage: hoys = range(1,48) # The first two days of the year epw = EPW("c:/ladybug/weatherdata.epw") DBT = epw.dry_bulb_temperature filteredDBT = DBT.filter_by_hoys(hoys) """ _moys = tuple(int(hour * 60) for hour in hoys) return self.filter_by_moys(_moys)
[docs] def filter_by_conditional_statement(self, statement): """Filter the list based on a conditional statement. Args: statement: A conditional statement as a string (e.g. x>25 and x%5==0). The variable should always be named as x Return: A new _dataList with filtered data Usage: epw = EPW("c:/ladybug/weatherdata.epw") DBT = epw.dry_bulb_temperature # filter data for when dry bulb temperature is more then 25 filtered_DBT = DBT.filter_by_conditional_statement('x > 25') # get the list of time stamps that meet the conditional statement print(filtered_DBT.time_stamps) """ def check_input_statement(statement): st_statement = statement.lower() \ .replace("and", "").replace("or", "") \ .replace("not", "").replace("in", "").replace("is", "") parsed_st = [s for s in st_statement if s.isalpha()] if list(set(parsed_st)) != ['x']: statement_error_msg = 'Invalid input statement. ' + \ 'Statement should be a valid Python statement' + \ ' and the variable should be named as x' raise ValueError(statement_error_msg) check_input_statement(statement) statement = statement.replace('x', 'd.value') _filtered_data = [d for d in self.values if eval(statement)] # create a new filtered_data if self.header: _filteredHeader = self.header.duplicate() _filteredHeader.analysis_period = None return DataCollection(_filtered_data, _filteredHeader) else: return DataCollection(_filtered_data)
[docs] def filter_by_pattern(self, pattern): """Filter the list based on a list of Boolean. Length of Boolean should be equal to length of values in _dataList Args: pattern: A list of True, False values Return: A new _dataList with filtered data """ try: _len = len(pattern) except TypeError: raise ValueError("pattern should be a list of values.") _filtered_data = [d for count, d in enumerate(self.values) if pattern[count % _len]] # create a new filtered_data if self.header: _filteredHeader = self.header.duplicate() _filteredHeader.analysis_period = None return DataCollection(_filtered_data, _filteredHeader) else: return DataCollection(_filtered_data)
[docs] def average_data_monthly(self, data): """Return a dictionary of values for average values for available months.""" # group data for each month monthly_values = self.group_data_by_month(data) average_values = OrderedDict() # average values for each month for month, values in monthly_values.items(): average_values[month] = self.average(values) return average_values
[docs] def average_data(self): """Return average value for data collection.""" return self.average(self.values)
[docs] def average_monthly(self): """Return a dictionary of values for average values for available months.""" return self.average_data_monthly(self.values)
[docs] def average_data_monthly_for_each_hour(self, data): """Calculate average value for each hour during each month. This method returns a dictionary with nested dictionaries for each hour """ # get monthy values monthly_hourly_values = self.group_data_by_month(data) # group data for each hour in each month and collect them in a dictionary averaged_monthly_values_per_hour = OrderedDict() for month, monthly_values in monthly_hourly_values.items(): if month not in averaged_monthly_values_per_hour: averaged_monthly_values_per_hour[month] = OrderedDict() # group data for each hour grouped_hourly_data = self.group_data_by_hour(monthly_values) for hour, data in grouped_hourly_data.items(): averaged_monthly_values_per_hour[month][hour] = self.average(data) return averaged_monthly_values_per_hour
[docs] def average_monthly_for_each_hour(self): """Calculate average value for each hour during each month. This method returns a dictionary with nested dictionaries for each hour """ return self.average_data_monthly_for_each_hour(self.values)
def __len__(self): return len(self._data) def __getitem__(self, key): return self._data[key] def __setitem__(self, key, value): raise TypeError('Use update_data_for_an_hour to set the values.') def __delitem__(self, key): del self._data[key] def __iter__(self): return iter(self._data) def __reversed__(self): return reversed(self._data) def __contains__(self, item): return item in self._data
[docs] def to_json(self): """Convert data collection to a dictionary.""" return { 'data': [d.to_json() for d in self._data], 'header': self.header.to_json() if self.header else {} }
[docs] def ToString(self): """Overwrite .NET ToString method.""" return self.__repr__()
def __repr__(self): """_data collection representation.""" if self.header and self.header.data_type: return "{}: #{}".format(self.header.data_type, len(self._data)) else: return "DataCollection: #{}".format(len(self._data))