从网页的javascript代码中提取数据

时间:2017-08-22 17:15:45

标签: json python-3.x sqlite parsing beautifulsoup

我正在尝试提取将具有下面粘贴的代码结构的数据,并且从这些数据中我想提取每个10v,20V的X和y ......并将它们分开。在解析我将拥有的数据列表之后,我最终计划生成每个图的绘图。

汤:

<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-72511212-1', 'auto');
  ga('send', 'pageview');

</script>
<!DOCTYPE html>

<html>
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<title>Metabolite Chart</title>
<!-- 1. Add these JavaScript inclusions in the head of your page -->
<script src="lib/js/jquery-1.6.x/jquery-1.6.1.min.js" type="text/javascript"></script>
<script src="lib/js/misc/highcharts.src.delta.js" type="text/javascript"></script>
<!--<script type="text/javascript" src="/lib/highcharts.js"></script>-->
<script src="lib/js/misc/excanvas.compiled.js" type="text/javascript"></script>
<!--
    <script src="https://code.highcharts.com/highcharts.js"></script>
    <script src="https://code.highcharts.com/modules/exporting.js"></script>
-->
<!-- 2. Add the JavaScript to initialize the chart on document ready -->
<script type="text/javascript">

        //alert("molI: " + "203");
        //alert("molN: " + "Chenodeoxycholic acid glycine conjugate");
        $(document).ready(function() {

            //alert("molN: " + "Chenodeoxycholic acid glycine conjugate");
            //  function resetchart() {
            //  fireEvent(chart, 'selection', { resetSelection: true }, zoom);
            //  }
            var count = 0;
            //alert("molI: " + "203");
            //var mid = 203;                // Pass MID here!
            var mid = "203";                // Pass 
            var mole = "Chenodeoxycholic acid glycine conjugate";           // Pass molecule name here!


            var chart = new Highcharts.Chart({
                chart: {
                    renderTo: 'container',
                    defaultSeriesType: 'column',
                    zoomType: 'xy',
                    margin: [50, 50, 200, 80]
                },
                title: {
                    text: '' + mole
                },
                subtitle: {
                    text: "MID: 203&nbsp;&nbsp;&nbsp;&nbsp;<font color='blue'><b>Insilico predicted spectra<\/b><\/font>"                },
                credits: {
                    enabled: false
                },
                xAxis: {
                    min: 0,
                    //  max: 200,
                    title: {
                        enabled: true,
                        text: 'Mass (m/z)'
                    },
                    maxZoom: 0.1,
                    tickPixelInterval: 100
                },
                yAxis: {
                    min: 0,
                    max: 100,
                    title: {
                        text: 'Intensity (%)'
                    }
                },
                legend: {
                    enabled: true,
                    showFragments: true,
                    showNeutrals: false,
                    showPeaks: false,
                    exclusiveSelect: true,    // Turns on exclusive radio style buttons
                    dblClick: false,
                    startNumber: 0, // The default legend item when page loads
                    borderWidth: 1,
                    layout: 'vertical',
                    backgroundColor: '#FFFFFF',
                    style: {
                        left: '50px',
                        top: '300px',
                        bottom: 'auto'
                    }
                },
                // Tooltip HTML
                tooltip: {
                    second: true,
                    neutral: false,
                    borderRadius: 0, 
                    formatter: function() {
                        var namestr;
                        if (this.series.name.match(/\+/g) && !this.series.name.match("Cl"))
                            namestr = "Mode: <b><font size=\"4\">(+)</font></b> &nbsp;&nbsp;&nbsp;&nbsp; Collision Energy: ";
                        else if (this.series.name.match('-'))
                            namestr = "Mode: <b><font size=\"4\">(-)</font></b> &nbsp;&nbsp;&nbsp;&nbsp; Collision Energy: ";
                        if (!(this.series.name.match("10 V")||this.series.name.match("20 V")||this.series.name.match("40 V")))
                            namestr += "<b><font size=\"3\">0 V</font></b>";
                        else if (this.series.name.match("10 V"))
                            namestr += "<b><font size=\"3\">10 V</font></b>";
                        else if (this.series.name.match("20 V"))
                            namestr += "<b><font size=\"3\">20 V</font></b>";
                        else if (this.series.name.match("40 V"))
                            namestr += "<b><font size=\"3\">40 V</font></b>";
                       return '<center><br/> &nbsp;&nbsp;&nbsp;&nbsp; '+ namestr +'<br/>' + '&nbsp;&nbsp;&nbsp; m/z: <b><font size="3">' + this.x.toFixed(4) + '</font></b> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Intensity: <b><font size="3">' + parseInt(Math.abs(this.y)) + ' % &nbsp;&nbsp;</font></b></center><br/>';
                    },
                    formatter2: function() {
                        var namestr;
                        if (this.series.name.match(/\+/g))
                            namestr = "Mode: (+), &nbsp;&nbsp; Collision Energy: ";
                        else if (this.series.name.match('-'))
                            namestr = "Mode: (-), &nbsp;&nbsp; Collision Energy: ";

                        if (!(this.series.name.match("10 V")||this.series.name.match("20 V")||this.series.name.match("40 V")))
                            namestr += "0 V, &nbsp;&nbsp; Adduct: ";
                        else if (this.series.name.match("10 V"))
                            namestr += "10 V, &nbsp;&nbsp; Adduct: ";
                        else if(this.series.name.match("20 V"))
                            namestr += "20 V, &nbsp;&nbsp; Adduct: ";
                        else if (this.series.name.match("40 V"))
                            namestr += "40 V, &nbsp;&nbsp; Adduct: ";

                                                return false;
                    }
                    },
                    plotOptions: {
                        column: {
                            pointPadding: 0.53,
                            // pointPadding: 0.99,
                            borderWidth: 0,
                            shadow: false
                            // borderColor: '#000000'
                        }
                    },
                    series: [{name: '&nbsp;&nbsp;&nbsp;&nbsp; (+) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 10 V &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; [M+H]+ &nbsp;&nbsp;',data:[{x:450.321,y:84,fragment: false},{x:434.29,y:0,fragment: false},{x:432.311,y:8,fragment: false},{x:432.311,y:100,fragment: false},{x:416.28,y:0,fragment: false},{x:414.3,y:24,fragment: false},{x:406.332,y:0,fragment: false},{x:404.316,y:16,fragment: false},{x:390.3,y:0,fragment: false},{x:390.264,y:0,fragment: false},{x:388.321,y:0,fragment: false},{x:386.305,y:8,fragment: false},{x:375.289,y:24,fragment: false},{x:372.29,y:0,fragment: false},{x:357.279,y:12,fragment: false},{x:347.294,y:4,fragment: false},{x:331.3,y:0,fragment: false},{x:329.284,y:4,fragment: false},{x:319.263,y:0,fragment: false},{x:301.253,y:0,fragment: false},{x:291.232,y:0,fragment: false},{x:273.221,y:0,fragment: false},{x:240.159,y:0,fragment: false},{x:158.081,y:0,fragment: false},{x:130.05,y:0,fragment: false},{x:76.0393,y:44,fragment: false},{x:74.0237,y:0,fragment: false},{x:59.0128,y:8,fragment: false},{x:58.0287,y:0,fragment: false},{x:30.0338,y:0,fragment: false},{x:28.0182,y:0,fragment: false} ]},{name: '&nbsp;&nbsp;&nbsp;&nbsp; (+) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 20 V &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; [M+H]+ &nbsp;&nbsp;',data:[{x:450.321,y:11.764705882353,fragment: false},{x:434.29,y:0,fragment: false},{x:432.311,y:35.294117647059,fragment: false},{x:432.311,y:5.8823529411765,fragment: false},{x:416.28,y:5.8823529411765,fragment: false},{x:414.3,y:58.823529411765,fragment: false},{x:404.316,y:11.764705882353,fragment: false},{x:388.321,y:5.8823529411765,fragment: false},{x:386.305,y:35.294117647059,fragment: false},{x:375.289,y:29.411764705882,fragment: false},{x:372.29,y:5.8823529411765,fragment: false},{x:357.279,y:35.294117647059,fragment: false},{x:347.294,y:17.647058823529,fragment: false},{x:333.279,y:5.8823529411765,fragment: false},{x:331.3,y:5.8823529411765,fragment: false},{x:329.284,y:17.647058823529,fragment: false},{x:327.268,y:5.8823529411765,fragment: false},{x:319.263,y:0,fragment: false},{x:315.268,y:5.8823529411765,fragment: false},{x:301.253,y:5.8823529411765,fragment: false},{x:273.221,y:5.8823529411765,fragment: false},{x:158.081,y:5.8823529411765,fragment: false},{x:130.05,y:5.8823529411765,fragment: false},{x:111.08,y:5.8823529411765,fragment: false},{x:102.019,y:0,fragment: false},{x:76.0393,y:100,fragment: false},{x:74.0237,y:5.8823529411765,fragment: false},{x:59.0128,y:23.529411764706,fragment: false},{x:58.0287,y:17.647058823529,fragment: false},{x:30.0338,y:11.764705882353,fragment: false},{x:28.0182,y:11.764705882353,fragment: false} ]},{name: '&nbsp;&nbsp;&nbsp;&nbsp; (+) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 40 V &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; [M+H]+ &nbsp;&nbsp;',data:[{x:416.28,y:18.181818181818,fragment: false},{x:414.3,y:100,fragment: false},{x:388.321,y:18.181818181818,fragment: false},{x:386.305,y:54.545454545455,fragment: false},{x:372.29,y:9.0909090909091,fragment: false},{x:359.294,y:9.0909090909091,fragment: false},{x:357.279,y:63.636363636364,fragment: false},{x:355.263,y:9.0909090909091,fragment: false},{x:333.279,y:0,fragment: false},{x:331.3,y:18.181818181818,fragment: false},{x:331.263,y:9.0909090909091,fragment: false},{x:329.284,y:36.363636363636,fragment: false},{x:327.268,y:9.0909090909091,fragment: false},{x:317.284,y:9.0909090909091,fragment: false},{x:315.268,y:18.181818181818,fragment: false},{x:303.268,y:9.0909090909091,fragment: false},{x:301.253,y:18.181818181818,fragment: false},{x:275.237,y:9.0909090909091,fragment: false},{x:273.221,y:18.181818181818,fragment: false},{x:261.221,y:0,fragment: false},{x:111.08,y:9.0909090909091,fragment: false},{x:97.0648,y:9.0909090909091,fragment: false},{x:76.0393,y:45.454545454545,fragment: false},{x:59.0128,y:63.636363636364,fragment: false},{x:58.0287,y:81.818181818182,fragment: false},{x:55.0542,y:9.0909090909091,fragment: false},{x:44.9971,y:9.0909090909091,fragment: false},{x:41.0022,y:18.181818181818,fragment: false},{x:32.0495,y:9.0909090909091,fragment: false},{x:30.0338,y:36.363636363636,fragment: false},{x:28.0182,y:54.545454545455,fragment: false} ]}]
                }); 

            });
        </script>
</head>
<body style="border:0;overflow:visible">
<!-- 3. Add the container -->
<div id="container" style="width: 720px; height: 460px; margin: 0 auto">
</div>
<!-- <table align = "center" style="border-width:0; cellpadding:5; table-layout:fixed; bordercolor:'#00FF00'"> -->
<table align="center" style="border-width:0; cellpadding:5; table-layout:fixed; bordercolor:'#00FF00'">
<tr>
<td style="border-style: solid; border-color:#FFF8C6"><img align="top" alt="attention" src="img/attn.png" title="how to use spectrum"/>  
                            <font color="red" face="helvetica,arial" size="2">
<b>Please mouse over the spectrum to view the detail information of each peak<br/>
                                        Use left mouse button to zoom in (click and drag) and zoom out (double-click)</b></font>
</td>
</tr>
</table>
<script type="text/javascript">

                    var _gaq = _gaq || [];
                    _gaq.push(['_setAccount', 'UA-1907670-5']);
                    _gaq.push(['_trackPageview']);

                    (function() {
                        var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
                        ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
                        var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
                    })();

                </script>
</body>
</html>

我想要提取的数据:

series: [{name: '&nbsp;&nbsp;&nbsp;&nbsp; (+) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 10 V &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; [M+H]+ &nbsp;&nbsp;',data:[{x:450.321,y:84,fragment: false},{x:434.29,y:0,fragment: false},{x:432.311,y:8,fragment: false},{x:432.311,y:100,fragment: false},{x:416.28,y:0,fragment: false},{x:414.3,y:24,fragment: false},{x:406.332,y:0,fragment: false},{x:404.316,y:16,fragment: false},{x:390.3,y:0,fragment: false},{x:390.264,y:0,fragment: false},{x:388.321,y:0,fragment: false},{x:386.305,y:8,fragment: false},{x:375.289,y:24,fragment: false},{x:372.29,y:0,fragment: false},{x:357.279,y:12,fragment: false},{x:347.294,y:4,fragment: false},{x:331.3,y:0,fragment: false},{x:329.284,y:4,fragment: false},{x:319.263,y:0,fragment: false},{x:301.253,y:0,fragment: false},{x:291.232,y:0,fragment: false},{x:273.221,y:0,fragment: false},{x:240.159,y:0,fragment: false},{x:158.081,y:0,fragment: false},{x:130.05,y:0,fragment: false},{x:76.0393,y:44,fragment: false},{x:74.0237,y:0,fragment: false},{x:59.0128,y:8,fragment: false},{x:58.0287,y:0,fragment: false},{x:30.0338,y:0,fragment: false},{x:28.0182,y:0,fragment: false} ]},{name: '&nbsp;&nbsp;&nbsp;&nbsp; (+) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 20 V &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; [M+H]+ &nbsp;&nbsp;',data:[{x:450.321,y:11.764705882353,fragment: false},{x:434.29,y:0,fragment: false},{x:432.311,y:35.294117647059,fragment: false},{x:432.311,y:5.8823529411765,fragment: false},{x:416.28,y:5.8823529411765,fragment: false},{x:414.3,y:58.823529411765,fragment: false},{x:404.316,y:11.764705882353,fragment: false},{x:388.321,y:5.8823529411765,fragment: false},{x:386.305,y:35.294117647059,fragment: false},{x:375.289,y:29.411764705882,fragment: false},{x:372.29,y:5.8823529411765,fragment: false},{x:357.279,y:35.294117647059,fragment: false},{x:347.294,y:17.647058823529,fragment: false},{x:333.279,y:5.8823529411765,fragment: false},{x:331.3,y:5.8823529411765,fragment: false},{x:329.284,y:17.647058823529,fragment: false},{x:327.268,y:5.8823529411765,fragment: false},{x:319.263,y:0,fragment: false},{x:315.268,y:5.8823529411765,fragment: false},{x:301.253,y:5.8823529411765,fragment: false},{x:273.221,y:5.8823529411765,fragment: false},{x:158.081,y:5.8823529411765,fragment: false},{x:130.05,y:5.8823529411765,fragment: false},{x:111.08,y:5.8823529411765,fragment: false},{x:102.019,y:0,fragment: false},{x:76.0393,y:100,fragment: false},{x:74.0237,y:5.8823529411765,fragment: false},{x:59.0128,y:23.529411764706,fragment: false},{x:58.0287,y:17.647058823529,fragment: false},{x:30.0338,y:11.764705882353,fragment: false},{x:28.0182,y:11.764705882353,fragment: false} ]},{name: '&nbsp;&nbsp;&nbsp;&nbsp; (+) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 40 V &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; [M+H]+ &nbsp;&nbsp;',data:[{x:416.28,y:18.181818181818,fragment: false},{x:414.3,y:100,fragment: false},{x:388.321,y:18.181818181818,fragment: false},{x:386.305,y:54.545454545455,fragment: false},{x:372.29,y:9.0909090909091,fragment: false},{x:359.294,y:9.0909090909091,fragment: false},{x:357.279,y:63.636363636364,fragment: false},{x:355.263,y:9.0909090909091,fragment: false},{x:333.279,y:0,fragment: false},{x:331.3,y:18.181818181818,fragment: false},{x:331.263,y:9.0909090909091,fragment: false},{x:329.284,y:36.363636363636,fragment: false},{x:327.268,y:9.0909090909091,fragment: false},{x:317.284,y:9.0909090909091,fragment: false},{x:315.268,y:18.181818181818,fragment: false},{x:303.268,y:9.0909090909091,fragment: false},{x:301.253,y:18.181818181818,fragment: false},{x:275.237,y:9.0909090909091,fragment: false},{x:273.221,y:18.181818181818,fragment: false},{x:261.221,y:0,fragment: false},{x:111.08,y:9.0909090909091,fragment: false},{x:97.0648,y:9.0909090909091,fragment: false},{x:76.0393,y:45.454545454545,fragment: false},{x:59.0128,y:63.636363636364,fragment: false},{x:58.0287,y:81.818181818182,fragment: false},{x:55.0542,y:9.0909090909091,fragment: false},{x:44.9971,y:9.0909090909091,fragment: false},{x:41.0022,y:18.181818181818,fragment: false},{x:32.0495,y:9.0909090909091,fragment: false},{x:30.0338,y:36.363636363636,fragment: false},{x:28.0182,y:54.545454545455,fragment: false} ]}]

我无法访问该段数据并以有用的方式提取数据。我注意到它有一个JSON格式结构,但我无法从汤中获取它以利用它。

如果我不清楚我想要做什么,请告诉我。

下面是我的python脚本:

   from bs4 import BeautifulSoup
import urllib
import urllib.request
import xlwt
import xlrd
import requests
import re
import json

CASNUMBERS =xlrd.open_workbook("./OUTPUTFILE.xls")
CASNUMBERS_sheet = CASNUMBERS.sheet_by_index(0)

# import sqlite3
# conn = sqlite3.connect('CurationParsedData.db')
# c = conn.cursor()
#
# ##Create Table
# c.execute('''CREATE TABLE CurationParsedData(Exp_website TEXT,InSilico_website TEXT)''')
#
#
# def add_website(exp,insil):
#     c.execute("INSERT INTO CurationParsedData VALUES("+ exp ","+ insil")")



# MID = "154"
# NAMEID = "glucose"
# CASID = "492-62-6"
# KEGGID = "C00267"


def make_soup(url):
    thepage = urllib.request.urlopen(url)
    soupdata = BeautifulSoup(thepage, "html.parser")
    return soupdata

def ESILINK(number):
    ESIlink = "https://metlin.scripps.edu/showChart.php?molid=" + number + "&h=240&collE=&Imode=p&etype=experimental"
    return ESIlink


def insilico(number):
    insilicolink = "https://metlin.scripps.edu/showChart.php?molid=" + number + "&h=240&collE=&Imode=p&etype=insilico"
    return insilicolink

def metlinsearch(NAME= "",CAS = "", KEGG=""):
    metlin_search = "https://metlin.scripps.edu/advanced_search_result.php?molid=&mass_min=&mass_max=&name=" + NAME + "&formula=&cas=" + CAS + "&kegg=" + KEGG + "&smilefile=&msmspeaks_min=&AminoAcid=add&drug=add&toxinEPA=add&smilesExactMatchCheckBox=false&nameExactMatchCheckBox=false"
    return  metlin_search

def HMDBsearch(number):
    hmdb_search = "http://www.hmdb.ca/unearth/q?utf8=%E2%9C%93&query=" + number + "&searcher=metabolites&button="
    return hmdb_search


for items in CASNUMBERS_sheet.col_values(2, 1):
    print(items)

    hmdbsearch_link = HMDBsearch(items)

    print(hmdbsearch_link)

    metlinsearch_link = metlinsearch(CAS = items)
    metlinesearch_soup = make_soup(metlinsearch_link)

    firstMID = metlinesearch_soup.find("th", {"scope": "row"})
    allMID = metlinesearch_soup.findAll("th", {"scope": "row"})
    ESI = "NO LINK"
    INSILICO = "NO LINK"
    if firstMID != None:
        firstMID = firstMID.text
        ESI = ESILINK(firstMID)
        INSILICO = insilico(firstMID)

    if allMID != None:
        MIDlist = []
        for items in allMID:
            MIDlist.append(items.text)
    esi = ESILINK(ESI)
    sil = insilico(INSILICO)
    print(ESI)
    print(INSILICO)
    # sil_soup = make_soup(sil)
    sil_link = requests.get(INSILICO)
    sil_soup = BeautifulSoup(sil_link.text, "lxml")
    # print(sil_soup)
    series = sil_soup.findAll('script', {"type": "text/javascript"})
    series = series[3]
    info = []
    for x in series:
        info.append(str(x))

    for text in info:
        head, body, tail = text.partition('series:')
        tail = tail.replace('&nbsp', '').replace(';', '').replace(' ', '')
        print(tail)
        json_string = tail
        parse_json= json.loads(json_string)
        print(parse_json['data'])

1 个答案:

答案 0 :(得分:0)

我能够通过几个步骤获得我想要的输出,绝对不是最好和最佳的方式,但我首先将其转换为字符串 - &gt;分区到达我需要的部分 - &gt;删除了字符串中不感兴趣的字符 - &gt;通过for循环运行它以将数据重新组织回列表 - &gt;运行并分区x和y值 - &gt;转换为浮动 - &gt;值可用于绘图。

完成任务的方式非常圆,但在尝试解析JavaScript数据时可能会帮助有类似问题的人。

下面是代码:

from bs4 import BeautifulSoup
import urllib
import urllib.request
import xlwt
import xlrd
import requests
import re
import json
import matplotlib.pyplot as plt


CASNUMBERS =xlrd.open_workbook("./OUTPUTFILE.xls")
CASNUMBERS_sheet = CASNUMBERS.sheet_by_index(0)

# import sqlite3
# conn = sqlite3.connect('CurationParsedData.db')
# c = conn.cursor()
#
# ##Create Table
# c.execute('''CREATE TABLE CurationParsedData(Exp_website TEXT,InSilico_website TEXT)''')
#
#
# def add_website(exp,insil):
#     c.execute("INSERT INTO CurationParsedData VALUES("+ exp ","+ insil")")

def make_soup(url):
    thepage = urllib.request.urlopen(url)
    soupdata = BeautifulSoup(thepage, "html.parser")
    return soupdata

def ESILINK(number):
    ESIlink = "https://metlin.scripps.edu/showChart.php?molid=" + number + "&h=240&collE=&Imode=p&etype=experimental"
    return ESIlink


def insilico(number):
    insilicolink = "https://metlin.scripps.edu/showChart.php?molid=" + number + "&h=240&collE=&Imode=p&etype=insilico"
    return insilicolink

def metlinsearch(NAME= "",CAS = "", KEGG=""):
    metlin_search = "https://metlin.scripps.edu/advanced_search_result.php?molid=&mass_min=&mass_max=&name=" + NAME + "&formula=&cas=" + CAS + "&kegg=" + KEGG + "&smilefile=&msmspeaks_min=&AminoAcid=add&drug=add&toxinEPA=add&smilesExactMatchCheckBox=false&nameExactMatchCheckBox=false"
    return  metlin_search

def HMDBsearch(number):
    hmdb_search = "http://www.hmdb.ca/unearth/q?utf8=%E2%9C%93&query=" + number + "&searcher=metabolites&button="
    return hmdb_search


for items in CASNUMBERS_sheet.col_values(2, 1):
    print(items)

    hmdbsearch_link = HMDBsearch(items)

    print(hmdbsearch_link)

    metlinsearch_link = metlinsearch(CAS = items)
    metlinesearch_soup = make_soup(metlinsearch_link)

    firstMID = metlinesearch_soup.find("th", {"scope": "row"})
    allMID = metlinesearch_soup.findAll("th", {"scope": "row"})
    ESI = "NO LINK"
    INSILICO = "NO LINK"
    if firstMID != None:
        firstMID = firstMID.text
        ESI = ESILINK(firstMID)
        INSILICO = insilico(firstMID)

    if allMID != None:
        MIDlist = []
        for items in allMID:
            MIDlist.append(items.text)
    esi = ESILINK(ESI)
    sil = insilico(INSILICO)
    print(ESI)
    print(INSILICO)
    if ESI != "NO LINK":
        sil_link = requests.get(ESI)
        sil_soup = BeautifulSoup(sil_link.text, "lxml")
        # print(sil_soup)
        series = sil_soup.findAll('script', {"type": "text/javascript"})
        series = series[3]
        info = []
        for x in series:
            info.append(str(x))
        for text in info:
            head, body, tail = text.partition('series:')
            tail = tail.replace('&nbsp', '').replace(';', '').replace(' ', '').replace('fragment:false', '').replace('fragment:true', '').replace('\n', '').replace('[', '').replace(']', '').replace('name:', '').replace('data:', '')
            # print(tail)
        identifyers = ['{', '}']
        datalist = []
        temp = ''
        for data in tail:
            if data != identifyers:
                temp = temp + data
                # print(temp)
                # print('active1')
            if data in identifyers:
                datalist.append(temp)
                temp = ''
                # print("active2")
        # print(datalist)
        finallist = []
        for items in datalist:
            items = items.replace("}", '').replace('{', '').replace(',', '').replace(')', '').replace('(','')
            if items != '':
                finallist.append(items)
        print(finallist)
        for items in finallist:
            if items[0] == "x":
                head,body,tail = items.partition("x:")
                head,body,tail = tail.partition("y:")
                xvalue = round(float(head), 3)
                yvalue = round(float(tail), 3)
                print("x:",xvalue, "y:", yvalue)
            else:
                # items[1] == "-" or "+":
                print("energy", items)