Source code for vis.analyzers.experimenters.aggregator
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#--------------------------------------------------------------------------------------------------
# Program Name: vis
# Program Description: Helps analyze music with computers.
#
# Filename: controllers/experimenters/aggregator.py
# Purpose: Aggregating experimenters.
#
# Copyright (C) 2013, 2014 Christopher Antila
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#--------------------------------------------------------------------------------------------------
"""
.. codeauthor:: Christopher Antila <christopher@antila.ca>
Aggregating experimenters.
"""
# pylint: disable=pointless-string-statement
import six
import pandas
from vis.analyzers import experimenter
[docs]class ColumnAggregator(experimenter.Experimenter):
"""
(Arguments for the constructor are listed below).
Experiment that aggregates data from columns of a :class:`DataFrame`, or a list of
:class:`DataFrame` objects, by summing each row. Values from columns named ``'all'`` will not
be included in the aggregated results. You may provide a ``'column'`` setting to guide the
experimenter to include only certain results.
**Example 1**
Inputting single :class:`DataFrame` like this:
+-------+---------+---------+
| Index | piece_1 | piece_2 |
+=======+=========+=========+
| M3 | 12 | 24 |
+-------+---------+---------+
| m3 | NaN | 36 |
+-------+---------+---------+
| P5 | 3 | 9 |
+-------+---------+---------+
Yields this :class:`DataFrame`:
+-------+-------------------------------+
| Index | 'aggregator.ColumnAggregator' |
+=======+===============================+
| M3 | 36 |
+-------+-------------------------------+
| m3 | 36 |
+-------+-------------------------------+
| P5 | 12 |
+-------+-------------------------------+
**Example 2**
Inputting two :class:`DataFrame` objects is similar.
+-------+---------+
| Index | piece_1 |
+=======+=========+
| M3 | 12 |
+-------+---------+
| P5 | 3 |
+-------+---------+
+-------+---------+
| Index | piece_2 |
+=======+=========+
| M3 | 24 |
+-------+---------+
| m3 | 36 |
+-------+---------+
| P5 | 9 |
+-------+---------+
The result is the same :class:`DataFrame`:
+-------+-------------------------------+
| Index | 'aggregator.ColumnAggregator' |
+=======+===============================+
| M3 | 36 |
+-------+-------------------------------+
| m3 | 36 |
+-------+-------------------------------+
| P5 | 12 |
+-------+-------------------------------+
**Example 3**
You may also give a :class:`DataFrame` (or a list of :class:`DataFrame` objects) that have a
:class:`pandas.MultiIndex` as produced by subclasses of :class:`~vis.analyzers.indexer.Indexer`.
In this case, use the ``'column'`` setting to indicate which indexer's results you wish to
aggregate.
+-------+-----------------------------------+---------------------------------+
| | 'frequency.FrequencyExperimenter' | 'feelings.FeelingsExperimenter' |
+ +---------+-------------------------+---------------+-----------------+
| Index | '0,1' | '1,2' | 'Christopher' | 'Alex' |
+=======+=========+=========================+===============+=================+
| M3 | 12 | 24 | 'delight' | 'exuberance' |
+-------+---------+-------------------------+---------------+-----------------+
| m3 | NaN | 36 | 'sheer joy' | 'nonchalance' |
+-------+---------+-------------------------+---------------+-----------------+
| P5 | 3 | 9 | 'emptiness' | 'serenity' |
+-------+---------+-------------------------+---------------+-----------------+
If ``'column'`` is ``'frequency.FrequencyExperimenter'``, yet again you will have this
:class:`DataFrame`:
+-------+-------------------------------+
| Index | 'aggregator.ColumnAggregator' |
+=======+===============================+
| M3 | 36 |
+-------+-------------------------------+
| m3 | 36 |
+-------+-------------------------------+
| P5 | 12 |
+-------+-------------------------------+
"""
possible_settings = ['column']
"""
:keyword str 'column': The column name to use for aggregation. The default is ``None``, which
aggregates across all columns. If you set this to ``'all'``, it will override the default
behaviour of not including columns called ``'all'``.
"""
default_settings = {'column': None}
def __init__(self, index, settings=None):
"""
**For the __init__() Method**
:param index: The data to aggregate. The values should be numbers.
:type index: :class:`pandas.DataFrame` or list of :class:`pandas.DataFrame`
:param settings: Optional dictionary with the settings described above in
:const:`possible_settings`.
:type settings: dict or NoneType
"""
if settings is None or 'column' not in settings:
self._settings = {'column': ColumnAggregator.default_settings['column']}
else:
self._settings = {'column': settings['column']}
super(ColumnAggregator, self).__init__(index, None)
[docs] def run(self):
"""
Run the :class:`ColumnAggregator` experiment.
:returns: A :class:`Series` with an index that is the combination of all indices of the \
provided pandas objects, and the value is the sum of all values in the pandas objects.
:rtype: :class:`pandas.Series`
***Example:***
import music21
from vis.analyzers.indexers import noterest
from vis.analyzers.experimenters import aggregator, frequency
score = music21.converter.parse('example.xml')
notes = noterest.NoteRestIndexer(score).run()
freqs = frequency.FrequencyExperimenter(notes).run()
agg = aggregator.ColumnAggregator(freqs).run()
print(agg)
"""
# ensure we have a list of DatFrame
if isinstance(self._index, pandas.DataFrame):
aggregated = [self._index]
else:
aggregated = self._index
# if there's a 'column', select it from every DataFrame
if self._settings['column'] is not None:
def select_func(column_label):
"""
Used to select columns; automatically adjusts to select through the column label or
the upper-most level of a MultiIndex, as required.
"""
if isinstance(column_label, six.string_types):
return column_label == self._settings['column']
else:
return column_label[0] == self._settings['column']
aggregated = [df.select(select_func, axis=1) for df in aggregated]
# unless the 'column' is 'all', de-select all the 'all' columns
if self._settings['column'] != 'all':
aggregated = [df.select(lambda x: x != 'all', axis=1) for df in aggregated]
# concatenate the DataFrame together
aggregated = pandas.concat(aggregated, axis=1)
# calculate the sum
aggregated = aggregated.sum(axis=1, skipna=True)
return pandas.DataFrame({'aggregator.ColumnAggregator': aggregated})