Question

我一直在尝试学习HMM，因此我选择了一个有趣的问题并开始进行研究。本质上，我有一些时间戳记的地理坐标，并且鉴于先前观察到的行为，我想根据当前位置预测最可能的下一个位置。我正在使用hmmlearn在python中执行此操作。这是到目前为止的代码，由于我已经使用教程进行了几天的调试，因此有点粗糙：

from common import load_file_forcaster
import numpy as np
from sklearn.model_selection import train_test_split
from hmmlearn.hmm import GaussianHMM

class Forecaster:
    def __init__(self, file_name, n_hidden_states=4, test_size_mult=0.5, shuf=False, preprocess=True, latency_days=5):
        tupe = load_file_forcaster(file_name)

        if preprocess:
            tupe = self.preprocess(tupe)

        # tupe sub zero is the time in sorted order of occurance
        self._X = tupe[0]

        # Y tupe sub 1 is the actual points of the location in x,y,z format ftm
        self._Y = tupe[1]

        self._train_data, self._test_data = train_test_split(
            self._Y, test_size=test_size_mult, shuffle=shuf)

        #print('test followed train ' + str())
        self._hmm = GaussianHMM(n_components=n_hidden_states)
        self.n_latency_days = latency_days

    def test_data(self):
        return self._test_data

    def fit_data(self):
        first_column = self._train_data[:,0]
        second_column = self._train_data[:,1]
        third_column = self._train_data[:,2]
        feature_vector = np.column_stack((first_column, second_column, third_column))
        self._hmm.fit(feature_vector)

    def preprocess(self, tupe):
        #print('Before sort ' + str(list(zip(tupe[1], tupe[0]))))
        order = np.argsort(tupe[1])
        pnts_sorted = np.array(tupe[0])[order, :]
        ordered_times = np.sort(tupe[1])
        print('Shape ' + str(pnts_sorted.shape))
        print('Ordered_times ' + str(list(zip(ordered_times, pnts_sorted))))
        return (ordered_times, pnts_sorted)

    def find_single_likly(self, day_index=10):
        previous_data_start_index = max(0, day_index - self.n_latency_days)

        previous_data_end_index = max(0, day_index - 1)

        previous_data = self._test_data[previous_data_start_index:previous_data_end_index]

        outcome_list = []
        count = 0
        for location in self._Y:
            count = count + 1
            #print(str(count) + ' Prev data is ' + str(previous_data) + ' location is ' + str(location))
            observation = np.row_stack((previous_data, location))
            #print('Observation ' + str(observation))
            score = self._hmm.score(observation)
            if score < 0:
                score = np.exp(score)
            outcome_list.append(score)
            #print('Outcome list is ' + str(outcome_list))

        most_probable_outcome = self._Y[np.argmax(outcome_list)]
        final_eval = np.row_stack((previous_data, most_probable_outcome))
        final_score = self._hmm.score(final_eval)
        if final_score < 0:
            final_score = np.exp(final_score)

        return (previous_data, final_score, most_probable_outcome)

def main():
    print('Starting')
    f = Forecaster('geo_small_csv.txt')
    f.fit_data()

    for i in range(8, len(f.test_data())):
        tupe = f.find_single_likly(i)

        print('Predicted for ' + str(tupe[0]) + ' is probability ' + str(tupe[1]) + ' with answer ' + str(tupe[2]))

        if i + 1 < len(f.test_data()):
            print('Next outcome is ' + str(f._test_data[i+1]) + ' match is ' + str(f._test_data[i+1] == tupe[2]))

    print('Done')

if __name__== '__main__':
    main()

此外，此存储库包含我一直在使用的代码和数据： https://github.com/joshu0991/geo

我看到的是，当我评估模型的正确性时，概率很小，并且对下一点的预测通常永远不会正确。我的问题是，我的方法看起来正确吗？具体来说，我使用3d向量作为数据，因为我使用以下方法将经度和纬度转换为3d空间：

x = cos(lat) * cos(long)
y = sin(lat) * sin(long)
z = sin(lat)

如果这不是很严重的错误，那么我显然做错了什么吗？我认为HMM是基于我已阅读的几篇不同论文观察人类运动时的正确模型选择，特别是使用Mobility Markov Chains的Next Place Prediction，它开发的方法不同于HMM，但相距并不远，并且可以预测未来隐马尔可夫模型的位置。任何建议或正确方向的建议都将受到赞赏。

HMM进行位置预测

0 个答案: