Source code for vis.analyzers.experimenters.aggregator

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#--------------------------------------------------------------------------------------------------
# Program Name:           vis
# Program Description:    Helps analyze music with computers.
#
# Filename:               controllers/experimenters/aggregator.py
# Purpose:                Aggregating experimenters.
#
# Copyright (C) 2013, 2014 Christopher Antila
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#--------------------------------------------------------------------------------------------------
"""
.. codeauthor:: Christopher Antila <christopher@antila.ca>

Aggregating experimenters.
"""

# pylint: disable=pointless-string-statement

import six
import pandas
from vis.analyzers import experimenter


[docs]class ColumnAggregator(experimenter.Experimenter): """ (Arguments for the constructor are listed below). Experiment that aggregates data from columns of a :class:`DataFrame`, or a list of :class:`DataFrame` objects, by summing each row. Values from columns named ``'all'`` will not be included in the aggregated results. You may provide a ``'column'`` setting to guide the experimenter to include only certain results. **Example 1** Inputting single :class:`DataFrame` like this: +-------+---------+---------+ | Index | piece_1 | piece_2 | +=======+=========+=========+ | M3 | 12 | 24 | +-------+---------+---------+ | m3 | NaN | 36 | +-------+---------+---------+ | P5 | 3 | 9 | +-------+---------+---------+ Yields this :class:`DataFrame`: +-------+-------------------------------+ | Index | 'aggregator.ColumnAggregator' | +=======+===============================+ | M3 | 36 | +-------+-------------------------------+ | m3 | 36 | +-------+-------------------------------+ | P5 | 12 | +-------+-------------------------------+ **Example 2** Inputting two :class:`DataFrame` objects is similar. +-------+---------+ | Index | piece_1 | +=======+=========+ | M3 | 12 | +-------+---------+ | P5 | 3 | +-------+---------+ +-------+---------+ | Index | piece_2 | +=======+=========+ | M3 | 24 | +-------+---------+ | m3 | 36 | +-------+---------+ | P5 | 9 | +-------+---------+ The result is the same :class:`DataFrame`: +-------+-------------------------------+ | Index | 'aggregator.ColumnAggregator' | +=======+===============================+ | M3 | 36 | +-------+-------------------------------+ | m3 | 36 | +-------+-------------------------------+ | P5 | 12 | +-------+-------------------------------+ **Example 3** You may also give a :class:`DataFrame` (or a list of :class:`DataFrame` objects) that have a :class:`pandas.MultiIndex` as produced by subclasses of :class:`~vis.analyzers.indexer.Indexer`. In this case, use the ``'column'`` setting to indicate which indexer's results you wish to aggregate. +-------+-----------------------------------+---------------------------------+ | | 'frequency.FrequencyExperimenter' | 'feelings.FeelingsExperimenter' | + +---------+-------------------------+---------------+-----------------+ | Index | '0,1' | '1,2' | 'Christopher' | 'Alex' | +=======+=========+=========================+===============+=================+ | M3 | 12 | 24 | 'delight' | 'exuberance' | +-------+---------+-------------------------+---------------+-----------------+ | m3 | NaN | 36 | 'sheer joy' | 'nonchalance' | +-------+---------+-------------------------+---------------+-----------------+ | P5 | 3 | 9 | 'emptiness' | 'serenity' | +-------+---------+-------------------------+---------------+-----------------+ If ``'column'`` is ``'frequency.FrequencyExperimenter'``, yet again you will have this :class:`DataFrame`: +-------+-------------------------------+ | Index | 'aggregator.ColumnAggregator' | +=======+===============================+ | M3 | 36 | +-------+-------------------------------+ | m3 | 36 | +-------+-------------------------------+ | P5 | 12 | +-------+-------------------------------+ """ possible_settings = ['column'] """ :keyword str 'column': The column name to use for aggregation. The default is ``None``, which aggregates across all columns. If you set this to ``'all'``, it will override the default behaviour of not including columns called ``'all'``. """ default_settings = {'column': None} def __init__(self, index, settings=None): """ **For the __init__() Method** :param index: The data to aggregate. The values should be numbers. :type index: :class:`pandas.DataFrame` or list of :class:`pandas.DataFrame` :param settings: Optional dictionary with the settings described above in :const:`possible_settings`. :type settings: dict or NoneType """ if settings is None or 'column' not in settings: self._settings = {'column': ColumnAggregator.default_settings['column']} else: self._settings = {'column': settings['column']} super(ColumnAggregator, self).__init__(index, None)
[docs] def run(self): """ Run the :class:`ColumnAggregator` experiment. :returns: A :class:`Series` with an index that is the combination of all indices of the \ provided pandas objects, and the value is the sum of all values in the pandas objects. :rtype: :class:`pandas.Series` ***Example:*** import music21 from vis.analyzers.indexers import noterest from vis.analyzers.experimenters import aggregator, frequency score = music21.converter.parse('example.xml') notes = noterest.NoteRestIndexer(score).run() freqs = frequency.FrequencyExperimenter(notes).run() agg = aggregator.ColumnAggregator(freqs).run() print(agg) """ # ensure we have a list of DatFrame if isinstance(self._index, pandas.DataFrame): aggregated = [self._index] else: aggregated = self._index # if there's a 'column', select it from every DataFrame if self._settings['column'] is not None: def select_func(column_label): """ Used to select columns; automatically adjusts to select through the column label or the upper-most level of a MultiIndex, as required. """ if isinstance(column_label, six.string_types): return column_label == self._settings['column'] else: return column_label[0] == self._settings['column'] aggregated = [df.select(select_func, axis=1) for df in aggregated] # unless the 'column' is 'all', de-select all the 'all' columns if self._settings['column'] != 'all': aggregated = [df.select(lambda x: x != 'all', axis=1) for df in aggregated] # concatenate the DataFrame together aggregated = pandas.concat(aggregated, axis=1) # calculate the sum aggregated = aggregated.sum(axis=1, skipna=True) return pandas.DataFrame({'aggregator.ColumnAggregator': aggregated})