错误:没有这样的模块:fts4;与anaconda jupyter笔记本一起使用-Fuzzymatcher软件包

时间:2020-10-01 12:48:53

标签: python jupyter-notebook

我正在使用jupyter笔记本并尝试运行以下代码:


import pubchempy as pcp
from rdkit import Chem
import numpy as np
import pandas as pd
import json
import time
import fuzzymatcher

start_time = time.time()
#     CD = pd.read_excel(excelpathin)

    data = pd.ExcelFile(r"C:\Users\USER\Downloads\MOD_REINJ_NEG_ChemSpider Results.xlsx")
    data = pd.ExcelFile(excelpathin)

    df = data.parse (sheet_name=0)
    inKey = list()
    for idx,sd in enumerate(df['Structure']):
        print(idx)
        F = open ("temp.sdf","w")
        F.writelines (sd)
        F.close()
        suppl = Chem.SDMolSupplier ('temp.sdf')
        mol = next (suppl)
        if mol==None:
            inKey.append (np.nan)
        else:
            inKey.append (Chem.MolToInchiKey (mol))

    inKey = pd.DataFrame(inKey)
    inKey.columns = ['InChIKey']
    CD = pd.concat([df, inKey], axis=1, sort=False)
    
    
    print("--- %s seconds --f-add 3 cols" % (time.time() - start_time))

    # From here is the joindata function with modification
    # Load the parse HMDB file
    with open(jsonpathin, 'r') as read_file:
        data = json.load(read_file)

    start_time = time.time()
    # Load the parse HMDB file
    # with open('D:/BCDD/Documents/Tal/Projects/HMDB/DataSets/Parser_HMDB.py Output/serum_metabolites.json', 'r') as read_file:
    #     data = json.load(read_file)


    # Create a data frame from the list of dictionaries
    # df_hmdb = pd.DataFrame(data,  columns=['accession', 'name', 'chemical_formula', 'inchikey', 'disease_name' ])
    df_hmdb = pd.DataFrame(data)
    df_hmdb.drop(['description', 'synonyms', 'kegg_id', 'meta_cyc_id', 'pathway_name'], axis=1)

    df_excel = CD
    # Merge by inchikey
    joindata_by_inchikey = pd.merge(left=df_excel, right=df_hmdb, how='inner', left_on='InChIKey', right_on='inchikey')

    print("--- %s seconds --f-merge by inchikey " % (time.time() - start_time))

    start_time = time.time()
    # Reduce the rows to those we DID find a match by inchkey in bothe data sets
    df_hmdb_reduce_byinchik = df_hmdb.loc[~df_hmdb['inchikey'].isin(df_excel['InChIKey'])]
    df_excel_reduce_byinchik = df_excel.loc[~df_excel['InChIKey'].isin(joindata_by_inchikey['InChIKey'])]


    # joindata_by_name = fuzzymatcher.fuzzy_left_join(df_excel, df_hmdb, left_on="Name", right_on="name")
    joindata_by_name = fuzzymatcher.fuzzy_left_join(df_excel_reduce_byinchik, df_hmdb_reduce_byinchik, left_on="Name",
                                                    right_on="name")

    # Selecting threshold  best_match_score>0.25 maybe adjustments needed
    joindata_by_name = joindata_by_name[joindata_by_name['best_match_score'] > 0.55]
    # Drop columns the
    joindata_by_name.drop(['best_match_score', '__id_left', '__id_right'], axis=1, inplace=True)
    print("--- %s seconds --f-merge by name" % (time.time() - start_time))

    start_time = time.time()
    # Reduce the rows to those we DID find a match by inchkey in and by name both data sets
    df_hmdb_reduce_byname = df_hmdb_reduce_byinchik.loc[~df_hmdb_reduce_byinchik['name'].isin(joindata_by_name['name'])]
    df_excel_reduce_byname = df_excel_reduce_byinchik.loc[
        ~df_excel_reduce_byinchik['Name'].isin(joindata_by_name['Name'])]
    # Remove spaces between letters on  'Formula' ( there is  a warning)
    df_excel_reduce_byname.loc[:, 'Formula'] = df_excel_reduce_byname['Formula'].str.replace(' ', '')

    # Merge by chemical_formula
    joindata_by_CF = pd.merge(left=df_excel_reduce_byname, right=df_hmdb_reduce_byname, how='inner', left_on='Formula',
                              right_on='chemical_formula')

    # This data inculed rows from the original EXCEL file that we did NOT find and match ( by inchikey nor name nor CF)
    df_excel_reduce_byCF = df_excel_reduce_byname.loc[
        ~df_excel_reduce_byname['Formula'].isin(joindata_by_CF['chemical_formula'])]

    # Create a list of all columns of the HMDB JSON data
    colnames = joindata_by_inchikey.columns[6:]
    # Add those names as empty columns to the df_excel_reduce_byCF. reducedata in all the rows from the original Excel
    # that did NOT find a match and added the columns of the HMDB
    reducedata = df_excel_reduce_byCF.reindex(columns=[*df_excel_reduce_byCF.columns.tolist(), *colnames])

    # Append all the data sets
    # out = joindata_by_inchikey.append(joindata_by_name.append(joindata_by_CF))
    out = joindata_by_inchikey.append(joindata_by_name.append(joindata_by_CF.append(reducedata)))

    print("--- %s seconds --f-merge by CF" % (time.time() - start_time))
    

(我不确定代码是否相关,因为我加载了不在问题中共享的文件)

这是我得到的错误:

OperationalError                          Traceback (most recent call last)
<ipython-input-8-26c4a723b687> in <module>
     23 # joindata_by_name = fuzzymatcher.fuzzy_left_join(df_excel, df_hmdb, left_on="Name", right_on="name")
     24 joindata_by_name = fuzzymatcher.fuzzy_left_join(df_excel_reduce_byinchik, df_hmdb_reduce_byinchik, left_on="Name",
---> 25                                                 right_on="name")
     26 
     27 # Selecting threshold  best_match_score>0.25 maybe adjustments needed

~\Anaconda3\lib\site-packages\fuzzymatcher\__init__.py in fuzzy_left_join(df_left, df_right, left_on, right_on, left_id_col, right_id_col)
     39     m = Matcher(dp, dg, s)
     40     m.add_data(df_left, df_right, left_on, right_on,  left_id_col, right_id_col)
---> 41     m.match_all()
     42 
     43     return m.get_left_join_table()

~\Anaconda3\lib\site-packages\fuzzymatcher\matcher.py in match_all(self)
     87         self.scorer.add_data(self)
     88 
---> 89         self.data_getter.add_data(self)
     90 
     91         # Get a table that contains only the matches, scores and ids

~\Anaconda3\lib\site-packages\fuzzymatcher\data_getter_sqlite.py in add_data(self, matcher)
     58                  USING fts4({} TEXT, _concat_all TEXT, _concat_all_alternatives TEXT);
     59               """.format(matcher.right_id_col)
---> 60         con.execute(sql)
     61         con.execute("INSERT INTO fts_target SELECT * FROM df_right_processed")
     62 

OperationalError: no such module: fts4

带有模糊匹配器错误的行用于在其他计算机上使用Pycharme进行工作,但在jupyter笔记本上则没有。

我检查了这个答案:

  1. perationalError: No Such Module:fts4
  2. OperationalError: no such module: fts4

但是没有用,我不知道该怎么办

任何帮助提示暗示的想法都很感激

0 个答案:

没有答案
相关问题