Source code for vis.analyzers.indexers.repeat

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#--------------------------------------------------------------------------------------------------
# Program Name:           vis
# Program Description:    Helps analyze music with computers.
#
# Filename:               controllers/indexers/repeat.py
# Purpose:                Indexers that somehow consider repetition.
#
# Copyright (C) 2013, 2014 Christopher Antila
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#--------------------------------------------------------------------------------------------------
"""
.. codeauthor:: Christopher Antila <christopher@antila.ca>

Indexers that consider repetition in any way.
"""

from numpy import nan
import pandas
from vis.analyzers import indexer


[docs]class FilterByRepeatIndexer(indexer.Indexer): """ If the same event occurs many times in a row, remove all occurrences but the one with the \ lowest ``offset`` value (i.e., the "first" event). Because of how a :class:`DataFrame`'s index works, many of the events that would have been filtered will instead be replaced with :const:`numpy.NaN`. Please be careful that the behaviour of this indexer matches your expectations. """ required_score_type = 'pandas.Series' def __init__(self, score, settings=None): """ :param score: The indices from which to remove consecutive identical events. There must be at least one part in the score. :type score: :class:`pandas.DataFrame` or list of :class:`pandas.Series` :param settings: This indexer uses no settings, so this is ignored. :type settings: dict or NoneType :raises: :exc:`RuntimeError` if ``score`` is the wrong type. :raises: :exc:`RuntimeError` if ``score`` is not a list of the same types. """ super(FilterByRepeatIndexer, self).__init__(score, None) # This Indexer uses pandas magic, not an _indexer_func(). self._indexer_func = None
[docs] def run(self): """ Make a new index of the piece, removing any event that is identical to the preceding. :returns: A :class:`DataFrame` of the new indices. :rtype: :class:`pandas.DataFrame` """ # I'm relying on pandas' efficiency. In the future, maybe we should use multiprocessing? post = [] for part in self._score: if len(part.index) < 2: post.append(part) continue axe_me = [] prev_off = None for offset in list(part.index): if prev_off is None: pass # prevent the other tests from being tried elif part[offset] == part[prev_off]: axe_me.append(offset) prev_off = offset for axed in axe_me: part[axed] = nan post.append(part.dropna()) # prepare the proper return type combinations = [[x] for x in xrange(len(self._score))] return self.make_return([unicode(x)[1:-1] for x in combinations], post)