"""Ladybug data collection."""
from .header import Header
from .datatype import DataPoint
from collections import OrderedDict
try:
from itertools import izip as zip
except ImportError:
# python 3
xrange = range
[docs]class DataCollection(object):
"""A list of data with a header."""
__slots__ = ('_header', '_data')
def __init__(self, data=None, header=None):
"""Init class."""
self.header = header
if not data:
data = []
elif not hasattr(data, '__iter__'):
assert hasattr(data, 'isDataPoint'), \
'Expected DataPoint got {}'.format(type(data))
data = [data]
for d in data:
assert hasattr(d, 'isDataPoint'), \
'Expected DataPoint got {}'.format(type(d))
self._data = list(data)
[docs] @classmethod
def from_json(cls, data):
"""Create a data collection from a dictionary.
Args:
{
"data": [], //An array of Ladybug data points,
"header": {} // A Ladybug header
}
"""
if 'data' not in data:
input_data = []
else:
input_data = [DataPoint.from_json(d) for d in data['data']]
if 'header' not in data:
data['header'] = {}
return cls(input_data, Header.from_json(data['header']))
[docs] @classmethod
def from_list(cls, lst, location=None, data_type=None, unit=None,
analysis_period=None):
"""Create a data collection from a list.
lst items can be DataPoint or other values.
Args:
lst: A list of data.
location: location data as a ladybug Location or location string
(Default: unknown).
data_type: Type of data (e.g. Temperature) (Default: unknown).
unit: data_type unit (Default: unknown).
analysis_period: A Ladybug analysis period (Defualt: None)
"""
header = Header(location, data_type, unit, analysis_period)
if analysis_period:
return cls.from_data_and_datetimes(lst, analysis_period.datetimes, header)
else:
data = tuple(DataPoint.from_data(d) for d in lst)
return cls(data, header)
[docs] @classmethod
def from_data_and_datetimes(cls, data, datetimes, header=None):
"""Create a list from data and dateteimes."""
_d = tuple(DataPoint(v, d) for v, d in zip(data, datetimes))
return cls(_d, header)
[docs] @classmethod
def from_data_and_analysis_period(cls, data, analysis_period, header=None):
"""Create a list from data and analysis period."""
return cls.from_data_and_datetimes(data, analysis_period.datetimes, header)
@property
def header(self):
"""Get or set header."""
return self._header
@header.setter
def header(self, h):
self._header = None if not h else Header.from_header(h)
[docs] def append(self, d):
"""Append a single item to the list."""
assert hasattr(d, 'isDataPoint'), \
'Expected DataPoint got {}'.format(type(d))
self._data.append(d)
[docs] def extend(self, new_data):
"""Extend a number of items to the end of items."""
for d in new_data:
assert hasattr(d, 'isDataPoint'), \
'Expected DataPoint got {}'.format(type(d))
self._data.extend(new_data)
[docs] def insert(self, i, d):
"""Insert an item at a given position."""
assert hasattr(d, 'isDataPoint'), \
'Expected DataPoint got {}'.format(type(d))
assert isinstance(i, int), \
'Expected Integer got {}'.format(type(i))
self._data.insert(i, d)
[docs] def pop(self, i=-1):
"""Remove the item at the given position in the data collection, and return it.
If no index is specified, a.pop() removes and returns the last
item in the list.
"""
assert isinstance(i, int), \
'Expected Integer got {}'.format(type(i))
assert i < len(self._data), \
'Item({}) is larger than the length of the data collection({})' \
.format(i, len(self._data))
return self._data.pop(i)
@property
def datetimes(self):
"""Return datetimes for this collection as a tuple."""
return tuple(value.datetime for value in self)
@property
def values(self):
"""Return the list of values."""
return self._data
[docs] def duplicate(self):
"""Duplicate current data list."""
return DataCollection(self.values, self.header)
[docs] @staticmethod
def average(data):
"""Return average value for a list of ladybug data."""
values = (value.value for value in data)
return sum(values) / len(data)
[docs] @staticmethod
def group_data_by_month(data, month_range=xrange(1, 13)):
"""Return a dictionary of values where values are grouped for each month.
Key values are between 1-12
Args:
data: A list of DataPoint to be processed
month_range: A list of numbers for months. Default is 1-12
"""
hourly_data_by_month = OrderedDict()
for m in month_range:
hourly_data_by_month[m] = []
for d in data:
try:
hourly_data_by_month[d.datetime.month].append(d)
except KeyError:
# month is not there
pass
return hourly_data_by_month
[docs] def group_by_month(self, month_range=xrange(1, 13)):
"""
Return a dictionary of values where values are grouped for each month.
Key values are between 1-12
Args:
month_range: A list of numbers for months. Default is 1-12
Usage:
epwfile = EPW("epw file address")
monthly_values = epwfile.dry_bulb_temperature.group_by_month()
print(monthly_values[2]) # returns values for the month of March
"""
return self.group_data_by_month(self.values, month_range)
[docs] @staticmethod
def group_data_by_day(data, day_range=xrange(1, 366)):
"""
Return a dictionary of values where values are grouped by each day of year.
Key values are between 1-365
Args:
data: A list of DataPoint to be processed
day_range: A list of numbers for days. Default is 1-365
"""
hourly_data_by_day = OrderedDict()
for d in day_range:
hourly_data_by_day[d] = []
for d in data:
try:
hourly_data_by_day[d.datetime.doy].append(d)
except KeyError:
# day is not there
pass
return hourly_data_by_day
[docs] def group_by_day(self, day_range=xrange(1, 366)):
"""
Return a dictionary of values where values are grouped by each day of year.
Key values are between 1-365
Args:
day_range: A list of numbers for days. Default is 1-365
user_dataList: An optional data list of DataPoint to be processed
Usage:
epwfile = EPW("epw file address")
daily_values = epwfile.dry_bulb_temperature.group_by_day(range(1, 30))
print(daily_values[2]) # returns values for the second day of year
"""
return self.group_data_by_day(self.values, day_range)
[docs] @staticmethod
def group_data_by_hour(data, hour_range=xrange(0, 24)):
"""Return a dictionary of values where values are grouped by each hour of day.
Key values are between 0-23
Args:
data: A list of DataPoint to be processed
hour_range: A list of numbers for hours. Default is 1-24
"""
hourly_data_by_hour = OrderedDict()
for h in hour_range:
hourly_data_by_hour[h] = []
for d in data:
try:
hourly_data_by_hour[d.datetime.hour].append(d)
except KeyError:
# day is not there
pass
return hourly_data_by_hour
[docs] def group_by_hour(self, hour_range=xrange(0, 24)):
"""Return a dictionary of values where values are grouped by each hour of day.
Key values are between 0-23
Args:
hour_range: A list of numbers for hours. Default is 1-24
user_dataList: An optional data list of DataPoint to be processed
Usage:
epwfile = EPW("epw file address")
monthly_values = epwfile.dry_bulb_temperature.group_by_month([1])
grouped_hourly_data = epwfile.dry_bulb_temperature.group_data_dataBy_hour(
monthly_values[1])
for hour, data in grouped_hourly_data.items():
print("average temperature values for hour {} during JAN is {} {}"
.format(hour, core._dataList.average(data), DBT.header.unit))
"""
return self.group_data_by_hour(self.values, hour_range)
[docs] def update_data_for_hours_of_year(self, values, hours_of_year):
"""Update values new set of values for a list of hours of the year.
Length of values should be equal to number of hours in hours of year.
Args:
values: A list of values to be replaced in the file
hours_of_year: A list of hoy between 1 and 8760
"""
# check length of data vs length of analysis hours_of_year
if len(values) != len(hours_of_year):
raise ValueError("Length of values %d is not equal to " +
"number of hours in analysis period %d" %
(len(values), len(hours_of_year)))
# update values
updated_count = 0
for data in self.values:
try:
# find matching index for input data
index = hours_of_year.index(data.datetime.hoy)
except ValueError:
continue
else:
# update the value
data.value = values[index]
updated_count += 1
print("%s updated for %d hour%s." %
('Values are' if len(values) > 1 else 'Value is',
updated_count,
's' if len(values) > 1 else ''))
# return self for chaining methods
return self
[docs] def update_data_for_an_hour(self, value, hour_of_year):
"""
Replace current value in data list with a new value for a specific hoy.
Args:
value: A single value
hours_of_year: The hour of the year
"""
return self.update_data_for_hours_of_year((value,), (hour_of_year,))
[docs] def update_data_for_analysis_period(self, values, analysis_period):
"""Update values with new set of values for an analysis period.
Length of values should be equal to number of hours in analysis period.
Args:
values: A list of values to be replaced in the file
analysis_period: An analysis period for input the input values.
Default is set to the whole year.
"""
return self.update_data_for_hours_of_year(values, analysis_period.hoys)
[docs] def interpolate_data(self, timestep, cumulative=False):
"""Interpolate data for a finer timestep using a linear interpolation.
Args:
timestep: Target timestep as an integer. Target timestep must be
divisable by current timestep.
cumulative: A boolean that sets whether the interpolation
should treat the data colection values as cumulative, in
which case the value at each timestep is the value over
that timestep (instead of over the hour). The default is set to
False to yeild average values in between each of the hours.
"""
assert self.header is not None, 'Header cannot be None for interpolation.'
assert timestep % self.header.analysis_period.timestep == 0, \
'Target timestep({}) must be divisable by current timestep({})' \
.format(timestep, self.header.analysis_period.timestep)
assert isinstance(cumulative, bool), \
'Expected Boolean got {}'.format(type(cumulative))
_minutes_step = int(60 / int(timestep / self.header.analysis_period.timestep))
_data_length = len(self.values)
# generate new data
_data = tuple(
self[d].__class__(_v, self[d].datetime.add_minute(step * _minutes_step))
for d in xrange(_data_length)
for _v, step in zip(self.xxrange(self[d],
self[(d + 1) % _data_length],
timestep),
xrange(timestep))
)
# divide cumulative values by timestep
if cumulative is True:
for i, d in enumerate(_data):
_data[i].value = d.value / timestep
# shift data if half-hour interpolation has been selected.
if self.header.middle_hour is True:
shift_dist = int(timestep / 2)
_data = _data[-shift_dist:] + _data[:-shift_dist]
for i, d in enumerate(_data):
_data[i - shift_dist].datetime = d.datetime
return _data
[docs] @staticmethod
def xxrange(start, end, step_count):
"""Generate n values between start and end."""
_step = (end - start) / float(step_count)
return (start + (i * _step) for i in xrange(int(step_count)))
[docs] def filter_by_analysis_period(self, analysis_period=None):
"""
Filter a list based on an analysis period.
Args:
analysis period: A Ladybug analysis period
Return:
A new _dataList with filtered data
Usage:
# start of Feb to end of Mar
analysis_period = Analysis_period(2,1,1,3,31,24)
epw = EPW("c:/ladybug/weatherdata.epw")
DBT = epw.dry_bulb_temperature
filteredDBT = DBT.filter_by_analysis_period(analysis_period)
"""
if analysis_period.timestep != 1:
# interpolate data for smaller timestep
_int_data = self.interpolate_data(timestep=analysis_period.timestep)
# create a new header
_hea = self.header.duplicate()
_hea.analysis_period = analysis_period
_data = DataCollection(_int_data, _hea)
else:
_data = self
if not analysis_period or analysis_period.is_annual:
return _data.duplicate()
# create a new filtered_data
_filtered_data = _data.filter_by_hoys(analysis_period.hoys)
if self.header:
_filtered_data.header.analysis_period = analysis_period
return _filtered_data
[docs] def filter_by_moys(self, moys):
"""Filter the list based on a list of minutes of the year.
Args:
moys: A List of minutes of the year [0..8759 * 60]
Return:
A new _dataList with filtered data
Usage:
moys = range(0, 48 * 60) # The first two days of the year
epw = EPW("c:/ladybug/weatherdata.epw")
DBT = epw.dry_bulb_temperature
filteredDBT = DBT.filter_by_moys(moys)
"""
# There is no guarantee that data is continuous so I iterate through the
# each data point one by one
_filtered_data = [d for d in self.values if d.datetime.moy in moys]
# create a new filtered_data
if self.header:
_filteredHeader = self.header.duplicate()
_filteredHeader.analysis_period = None
return DataCollection(_filtered_data, _filteredHeader)
else:
return DataCollection(_filtered_data)
[docs] def filter_by_hoys(self, hoys):
"""Filter the list based on an analysis period.
Args:
hoys: A List of hours of the year 0..8759
Return:
A new _dataList with filtered data
Usage:
hoys = range(1,48) # The first two days of the year
epw = EPW("c:/ladybug/weatherdata.epw")
DBT = epw.dry_bulb_temperature
filteredDBT = DBT.filter_by_hoys(hoys)
"""
_moys = tuple(int(hour * 60) for hour in hoys)
return self.filter_by_moys(_moys)
[docs] def filter_by_conditional_statement(self, statement):
"""Filter the list based on a conditional statement.
Args:
statement: A conditional statement as a string (e.g. x>25 and x%5==0).
The variable should always be named as x
Return:
A new _dataList with filtered data
Usage:
epw = EPW("c:/ladybug/weatherdata.epw")
DBT = epw.dry_bulb_temperature
# filter data for when dry bulb temperature is more then 25
filtered_DBT = DBT.filter_by_conditional_statement('x > 25')
# get the list of time stamps that meet the conditional statement
print(filtered_DBT.time_stamps)
"""
def check_input_statement(statement):
st_statement = statement.lower() \
.replace("and", "").replace("or", "") \
.replace("not", "").replace("in", "").replace("is", "")
parsed_st = [s for s in st_statement if s.isalpha()]
if list(set(parsed_st)) != ['x']:
statement_error_msg = 'Invalid input statement. ' + \
'Statement should be a valid Python statement' + \
' and the variable should be named as x'
raise ValueError(statement_error_msg)
check_input_statement(statement)
statement = statement.replace('x', 'd.value')
_filtered_data = [d for d in self.values if eval(statement)]
# create a new filtered_data
if self.header:
_filteredHeader = self.header.duplicate()
_filteredHeader.analysis_period = None
return DataCollection(_filtered_data, _filteredHeader)
else:
return DataCollection(_filtered_data)
[docs] def filter_by_pattern(self, pattern):
"""Filter the list based on a list of Boolean.
Length of Boolean should be equal to length of values in _dataList
Args:
pattern: A list of True, False values
Return:
A new _dataList with filtered data
"""
try:
_len = len(pattern)
except TypeError:
raise ValueError("pattern should be a list of values.")
_filtered_data = [d for count, d in enumerate(self.values)
if pattern[count % _len]]
# create a new filtered_data
if self.header:
_filteredHeader = self.header.duplicate()
_filteredHeader.analysis_period = None
return DataCollection(_filtered_data, _filteredHeader)
else:
return DataCollection(_filtered_data)
[docs] def average_data_monthly(self, data):
"""Return a dictionary of values for average values for available months."""
# group data for each month
monthly_values = self.group_data_by_month(data)
average_values = OrderedDict()
# average values for each month
for month, values in monthly_values.items():
average_values[month] = self.average(values)
return average_values
[docs] def average_data(self):
"""Return average value for data collection."""
return self.average(self.values)
[docs] def average_monthly(self):
"""Return a dictionary of values for average values for available months."""
return self.average_data_monthly(self.values)
[docs] def average_data_monthly_for_each_hour(self, data):
"""Calculate average value for each hour during each month.
This method returns a dictionary with nested dictionaries for each hour
"""
# get monthy values
monthly_hourly_values = self.group_data_by_month(data)
# group data for each hour in each month and collect them in a dictionary
averaged_monthly_values_per_hour = OrderedDict()
for month, monthly_values in monthly_hourly_values.items():
if month not in averaged_monthly_values_per_hour:
averaged_monthly_values_per_hour[month] = OrderedDict()
# group data for each hour
grouped_hourly_data = self.group_data_by_hour(monthly_values)
for hour, data in grouped_hourly_data.items():
averaged_monthly_values_per_hour[month][hour] = self.average(data)
return averaged_monthly_values_per_hour
[docs] def average_monthly_for_each_hour(self):
"""Calculate average value for each hour during each month.
This method returns a dictionary with nested dictionaries for each hour
"""
return self.average_data_monthly_for_each_hour(self.values)
def __len__(self):
return len(self._data)
def __getitem__(self, key):
return self._data[key]
def __setitem__(self, key, value):
raise TypeError('Use update_data_for_an_hour to set the values.')
def __delitem__(self, key):
del self._data[key]
def __iter__(self):
return iter(self._data)
def __reversed__(self):
return reversed(self._data)
def __contains__(self, item):
return item in self._data
[docs] def to_json(self):
"""Convert data collection to a dictionary."""
return {
'data': [d.to_json() for d in self._data],
'header': self.header.to_json() if self.header else {}
}
[docs] def ToString(self):
"""Overwrite .NET ToString method."""
return self.__repr__()
def __repr__(self):
"""_data collection representation."""
if self.header and self.header.data_type:
return "{}: #{}".format(self.header.data_type, len(self._data))
else:
return "DataCollection: #{}".format(len(self._data))