matplotlib:在跳过没有数据的句点时绘制时间序列

时间:2016-01-03 19:15:36

标签: python matplotlib plot time-series timeserieschart

tl; dr:如何在绘制时间序列时跳过没有数据的时段?

我正在进行长时间的计算,我想监控其进度。有时我打断这个计算。日志存储在一个巨大的CSV文件中,如下所示:

2016-01-03T01:36:30.958199,0,0,0,startup
2016-01-03T01:36:32.363749,10000,0,0,regular
...
2016-01-03T11:12:21.082301,51020000,13402105,5749367,regular
2016-01-03T11:12:29.065687,51030000,13404142,5749367,regular
2016-01-03T11:12:37.657022,51040000,13408882,5749367,regular
2016-01-03T11:12:54.236950,51050000,13412824,5749375,shutdown
2016-01-03T19:02:38.293681,51050000,13412824,5749375,startup
2016-01-03T19:02:49.296161,51060000,13419181,5749377,regular
2016-01-03T19:03:00.547644,51070000,13423127,5749433,regular
2016-01-03T19:03:05.599515,51080000,13427189,5750183,regular
...

实际上,有41列。每列都是进度的某个指标。第二列总是以10000为步长递增。最后一列是不言自明的。

我想在同一图表上绘制每一列,同时跳过“关闭”和“启动”之间的时间段。理想情况下,我还想在每个跳过上画一条垂直线。

这是我到目前为止所得到的:

import matplotlib.pyplot as plt
import pandas as pd

# < ... reading my CSV in a Pandas dataframe `df` ... >

fig, ax = plt.subplots()

for col in ['total'] + ['%02d' % i for i in range(40)]:
    ax.plot_date(df.index.values, df[col].values, '-')

fig.autofmt_xdate()
plt.show()

so far

我想摆脱那个漫长的平坦期,而只是画一条垂直线。

我知道df.plot(),但根据我的经验,它已被破坏(除此之外,Pandas会以自己的格式转换datetime个对象,而不是使用date2numnum2date

看起来可能的解决方案是写一个custom scaler,但这看起来很复杂。

据我了解,编写自定义Locator只会改变刻度的位置(小垂直线和相关标签),但不会改变绘图本身的位置。这是对的吗?

UPD:一个简单的解决方案是更改时间戳(比如说,重新计算它们为“自开始以来经过的时间”),但我更愿意保留它们。

UPD: https://stackoverflow.com/a/5657491/1214547的答案适用于我,并进行了一些修改。我很快就会写出我的解决方案。

2 个答案:

答案 0 :(得分:1)

这是一个适合我的解决方案。它不能很好地处理紧密定位的断裂(标签可能太拥挤),但在我的情况下它并不重要。

import bisect
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.scale as mscale
import matplotlib.transforms as mtransforms
import matplotlib.dates as mdates
import pandas as pd

# heavily borrows from http://stackoverflow.com/a/5657491/1214547

def CustomScaleFactory(breaks):
    class CustomScale(mscale.ScaleBase):
        name = 'custom'

        def __init__(self, axis, **kwargs):
            mscale.ScaleBase.__init__(self)

        def get_transform(self):
            return self.CustomTransform()

        def set_default_locators_and_formatters(self, axis):
            class HourSkippingLocator(mdates.HourLocator):
                _breaks = breaks
                def __init__(self, *args, **kwargs):
                    super(HourSkippingLocator, self).__init__(*args, **kwargs)

                def _tick_allowed(self, tick):
                    for left, right in self._breaks:
                        if left <= tick <= right:
                            return False
                    return True

                def __call__(self):
                    ticks = super(HourSkippingLocator, self).__call__()
                    ticks = [tick for tick in ticks if self._tick_allowed(tick)]
                    ticks.extend(right for (left, right) in self._breaks)
                    return ticks

            axis.set_major_locator(HourSkippingLocator(interval=3))
            axis.set_major_formatter(mdates.DateFormatter("%h %d, %H:%M"))

        class CustomTransform(mtransforms.Transform):
            input_dims = 1
            output_dims = 1
            is_separable = True
            has_inverse = True
            _breaks = breaks

            def __init__(self):
                mtransforms.Transform.__init__(self)

            def transform_non_affine(self, a):
                # I have tried to write something smart using np.cumsum(),
                # but failed, since it was too complicated to handle the
                # transformation for points within breaks.
                # On the other hand, these loops are very easily translated
                # in plain C.

                result = np.empty_like(a)

                a_idx = 0
                csum = 0
                for left, right in self._breaks:
                    while a_idx < len(a) and a[a_idx] < left:
                        result[a_idx] = a[a_idx] - csum
                        a_idx += 1
                    while a_idx < len(a) and a[a_idx] <= right:
                        result[a_idx] = left - csum
                        a_idx += 1
                    csum += right - left

                while a_idx < len(a):
                    result[a_idx] = a[a_idx] - csum
                    a_idx += 1

                return result

            def inverted(self):
                return CustomScale.InvertedCustomTransform()

        class InvertedCustomTransform(mtransforms.Transform):
            input_dims = 1
            output_dims = 1
            is_separable = True
            has_inverse = True
            _breaks = breaks

            def __init__(self):
                mtransforms.Transform.__init__(self)

            def transform_non_affine(self, a):
                # Actually, this transformation isn't exactly invertible.
                # It may glue together some points, and there is no way
                # to separate them back. This implementation maps both
                # points to the *left* side of the break.

                diff = np.zeros(len(a))

                total_shift = 0

                for left, right in self._breaks:
                    pos = bisect.bisect_right(a, left - total_shift)
                    if pos >= len(diff):
                        break
                    diff[pos] = right - left
                    total_shift += right - left

                return a + diff.cumsum()

            def inverted(self):
                return CustomScale.CustomTransform()

    return CustomScale


# < ... reading my CSV in a Pandas dataframe `df` ... >

startups = np.where(df['kind'] == 'startup')[0]
shutdowns = np.where(df['kind'] == 'shutdown')[0]

breaks_idx = list(zip(shutdowns, startups[1:]))
breaks_dates = [(df.index[l], df.index[r]) for (l, r) in breaks_idx]
breaks = [(mdates.date2num(l), mdates.date2num(r)) for (l, r) in breaks_dates]

fig, ax = plt.subplots()

for col in ['total'] + ['%02d' % i for i in range(40)]:
  ax.plot_date(df.index.values, df[col].values, '-')

# shame on matplotlib: there is no way to unregister a scale
mscale.register_scale(CustomScaleFactory(breaks))
ax.set_xscale('custom')

vlines_x = [r for (l, r) in breaks]
vlines_ymin = np.zeros(len(vlines_x))
vlines_ymax = [df.iloc[r]['total'] for (l, r) in breaks_idx]
plt.vlines(vlines_x, vlines_ymin, vlines_ymax, color='darkgrey')

fig.autofmt_xdate()
plt.ticklabel_format(axis='y', style='plain')

plt.show()

result

答案 1 :(得分:1)

@Pastafarianist提供了一个很好的解决方案。但是,当我处理具有多个中断的绘图时,我在InvertedCustomTransform中发现了一个错误。例如,在下面的代码中,十字线不能跟随光标跟在第二个和第三个断点上。

import bisect
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.scale as mscale
import matplotlib.transforms as mtransforms
import matplotlib.dates as mdates
import pandas as pd
from matplotlib.widgets import Cursor


def CustomScaleFactory(breaks):
    class CustomScale(mscale.ScaleBase):
        name = 'custom'

        def __init__(self, axis, **kwargs):
            mscale.ScaleBase.__init__(self)

        def get_transform(self):
            return self.CustomTransform()

        def set_default_locators_and_formatters(self, axis):
            class HourSkippingLocator(mdates.HourLocator):
                _breaks = breaks

                def __init__(self, *args, **kwargs):
                    super(HourSkippingLocator, self).__init__(*args, **kwargs)

                def _tick_allowed(self, tick):
                    for left, right in self._breaks:
                        if left <= tick <= right:
                            return False
                    return True

                def __call__(self):
                    ticks = super(HourSkippingLocator, self).__call__()
                    ticks = [tick for tick in ticks if self._tick_allowed(tick)
                             ]
                    ticks.extend(right for (left, right) in self._breaks)
                    return ticks

            axis.set_major_locator(HourSkippingLocator(interval=3))
            axis.set_major_formatter(mdates.DateFormatter("%h %d, %H:%M"))

        class CustomTransform(mtransforms.Transform):
            input_dims = 1
            output_dims = 1
            is_separable = True
            has_inverse = True
            _breaks = breaks

            def __init__(self):
                mtransforms.Transform.__init__(self)

            def transform_non_affine(self, a):
                # I have tried to write something smart using np.cumsum(),
                # It may glue together some points, and there is no way
                # to separate them back. This implementation maps both
                # points to the *left* side of the break.

                diff = np.zeros(len(a))

                total_shift = 0

                for left, right in self._breaks:
                    pos = bisect.bisect_right(a, left - total_shift)
                    if pos >= len(diff):
                        break
                    diff[pos] = right - left
                    total_shift += right - left

                return a + diff.cumsum()

            def inverted(self):
                return CustomScale.CustomTransform()

    return CustomScale

# stimulating data
index1 = pd.date_range(start='2016-01-08 9:30', periods=10, freq='30s')
index2 = pd.date_range(end='2016-01-08 15:00', periods=10, freq='30s')
index = index1.union(index2)
data1 = pd.Series(range(20), index=index.values)
index3 = pd.date_range(start='2016-01-09 9:30', periods=10, freq='30s')
index4 = pd.date_range(end='2016-01-09 15:00', periods=10, freq='30s')
index = index3.union(index4)
data2 = pd.Series(range(20), index=index.values)
data = pd.concat([data1, data2])
breaks_dates = [
    pd.datetime.strptime('2016-01-08 9:35:00', '%Y-%m-%d %H:%M:%S'),
    pd.datetime.strptime('2016-01-08 14:55:00', '%Y-%m-%d %H:%M:%S'),
    pd.datetime.strptime('2016-01-08 15:00:00', '%Y-%m-%d %H:%M:%S'),
    pd.datetime.strptime('2016-01-09 9:30:00', '%Y-%m-%d %H:%M:%S'),
    pd.datetime.strptime('2016-01-09 9:35:00', '%Y-%m-%d %H:%M:%S'),
    pd.datetime.strptime('2016-01-09 14:55:00', '%Y-%m-%d %H:%M:%S')
]
breaks_dates = [mdates.date2num(point_i) for point_i in breaks_dates]
breaks = [(breaks_dates[i], breaks_dates[i + 1]) for i in [0, 2, 4]]
fig, ax = plt.subplots()
ax.plot(data.index.values, data.values)
mscale.register_scale(CustomScaleFactory(breaks))
ax.set_xscale('custom')
cursor = Cursor(ax, useblit=True, color='r', linewidth=2)
plt.show()

enter image description here 如果按照以下方式更改'InvertedCustomTransform'类中的'transform_non_affine'函数,则效果很好。

def transform_non_affine(self, a):
    # Actually, this transformation isn't exactly invertible.
    # It may glue together some points, and there is no way
    # to separate them back. This implementation maps both
    # points to the *left* side of the break.

    diff = np.zeros(len(a))

    total_shift = 0

    for left, right in self._breaks:
        pos = bisect.bisect_right(a, left - total_shift)
        if pos >= len(diff):
            break
        diff[pos] = right - left + total_shift  # changed point
        total_shift += right - left
    return a + diff  # changed point

原因可能是转换方法的输入'a'不是整个轴,它只是一个长度为1的numpy.array。