无法使用Python提取JSON响应

时间:2013-09-07 04:24:46

标签: python json web web-scraping

我有这个链接网址,我试图抓取JSON响应... 这是我正在尝试的网址,

http://www.massimodutti.com/webapp/wcs/stores/servlet/ItxSolrSearchingDataCmd?catalogId=30220002&langId=-1&storeId=34009456&searchTerm=blazer

当我打开页面源代码时,我在

下找到了JSON
    <script type="text/javascript">
        var searchProductJSON= {
    "doc": {
        "name": "categoryProducts",
        "version": "0.2"
    },
    "id": "",
    "items": [


            {
                "id": "2958060",
                "type": "ProductBean",
                "name": "GREY SUIT JACKET",
                "ref": "06017761-I2013",
                "isNew": "",
                "onSale": 0,
                "curPrice": "&pound;145<span class='decimal'>.00</span>",
                "oldPrice": "",
                "numPrice": "145.00000",
                "colors": 1 , 
                "numColors" : "1",
                "colorCutImages": ["/2013/I/0/1/p/6017/761/802/6017761802_3_1_5.jpg?timestamp=1374079181922"],
                "colorImages": [{

                                "cutImage": "http://static.massimodutti.net/3/photos/2013/I/0/1/p/6017/761/802/6017761802_3_1_6.jpg",
                                "imageColor": "http://static.massimodutti.net/3/photos/2013/I/0/1/p/6017/761/802/6017761802_1_1_3.jpg",
                                "linkColor": "http://www.massimodutti.com/webapp/wcs/stores/servlet/product/duttigb/en/30220002/0/2958060/GREY%2BSUIT%2BJACKET/802"

                                }],

                "attr": {
                    "feature": [],
                    "nameFeature": [],
                    "color": [],
                    "nameColor": [],
                    "size": [],
                    "nameSize": [],
                    "style": [],
                    "nameStyle": [],
                    "cats": []
                },

                "linkProduct": "http://www.massimodutti.com/webapp/wcs/stores/servlet/product/duttigb/en/30220002/0/2958060/GREY%2BSUIT%2BJACKET",
                "link": {
                    "JSON": "http://www.massimodutti.com/webapp/wcs/stores/servlet/ProductJSON?catalogId=30220002&langId=-1&productId=2958060&storeId=34009456",
                    "full": "http://www.massimodutti.com/webapp/wcs/stores/servlet/product/duttigb/en/30220002/0/2958060/GREY%2BSUIT%2BJACKET"
                },

                "image": {
                    "zoom": "http://static.massimodutti.net/3/photos/2013/I/0/1/p/6017/761/802/6017761802_1_1_2.jpg",
                    "aux": "http://static.massimodutti.net/3/photos/2013/I/0/1/p/6017/761/802/6017761802_2_1_3.jpg",
                    "standard": "http://static.massimodutti.net/3/photos/2013/I/0/1/p/6017/761/802/6017761802_1_1_3.jpg"
                },
                "labels": {
                    "isLookbookLabel": 0,
                    "urlNewLabel": "",
                    "urlLookbookLabel": "",
                    "urlLabels": [],
                    "isNewLabel": 0
                },
                "bundleProductSummaries":[]
            }


            ,


        {
                "id": "3137509",
                "type": "ProductBean",
                "name": "NY BELTED BLAZER",
                "ref": "06027843-I2013",
                "isNew": "",
                "onSale": 0,
                "curPrice": "&pound;195<span class='decimal'>.00</span>",
                "oldPrice": "",
                "numPrice": "195.00000",
                "colors": 1 , 
                "numColors" : "1",
                "colorCutImages": ["http://static.massimodutti.net/3/photos/2013/I/0/1/p/6027/843/807/6027843807_3_1_5.jpg?timestamp=1377593014200"],
                "colorImages": [{

                                "cutImage": "http://static.massimodutti.net/3/photos/2013/I/0/1/p/6027/843/807/6027843807_3_1_6.jpg",
                                "imageColor": "http://static.massimodutti.net/3/photos/2013/I/0/1/p/6027/843/807/6027843807_1_1_3.jpg",
                                "linkColor": "http://www.massimodutti.com/webapp/wcs/stores/servlet/product/duttigb/en/30220002/0/3137509/NY%2BBELTED%2BBLAZER/807"

                                }],

                "attr": {
                    "feature": [],
                    "nameFeature": [],
                    "color": [],
                    "nameColor": [],
                    "size": [],
                    "nameSize": [],
                    "style": [],
                    "nameStyle": [],
                    "cats": []
                },

                "linkProduct": "http://www.massimodutti.com/webapp/wcs/stores/servlet/product/duttigb/en/30220002/0/3137509/NY%2BBELTED%2BBLAZER",
                "link": {
                    "JSON": "http://www.massimodutti.com/webapp/wcs/stores/servlet/ProductJSON?catalogId=30220002&langId=-1&productId=3137509&storeId=34009456",
                    "full": "http://www.massimodutti.com/webapp/wcs/stores/servlet/product/duttigb/en/30220002/0/3137509/NY%2BBELTED%2BBLAZER"
                },

                "image": {
                    "zoom": "http://static.massimodutti.net/3/photos/2013/I/0/1/p/6027/843/807/6027843807_1_1_2.jpg",
                    "aux": "http://static.massimodutti.net/3/photos/2013/I/0/1/p/6027/843/807/6027843807_2_1_3.jpg",
                    "standard": "http://static.massimodutti.net/3/photos/2013/I/0/1/p/6027/843/807/6027843807_1_1_3.jpg"
                },
                "labels": {
                    "isLookbookLabel": 0,
                    "urlNewLabel": "",
                    "urlLookbookLabel": "",
                    "urlLabels": [],
                    "isNewLabel": 0
                },
                "bundleProductSummaries":[]
            }


    ],
    "urlImg": "http://static.massimodutti.net/3/static2",
    "imgPrefix": "http://static.massimodutti.net/3/photos",
    "urlPrefix": "",
    "modal": "new ItxPopUpCachedProductView({productUrl: '&productId=$id'});"

}       
    </script>

    <script type="text/javascript">

        nlInputBinds();
        slideWhowBinds();
        load_search_products(searchProductJSON);

        if(!isIPad()){
            jQuery(window).bind('resize', function() {
                positions_products();
                fitFooterBottom($('#grid'));
            });
        }
        else{
            $("#content").removeClass("center");
            $(window).bind('orientationchange', function(e, onready){
                fitFooterBottom($('#grid'));
            });
            if (!isIPad_5()){
                setTimeout(function(){
                    $('body').css({'background':'url("'+DUTTI_STATIC_CONTENT_PATH+'/img/fondo_parrilla.png") repeat-y scroll 0 0 #EDEAE6'});
                },1000);
            }
        }

        setGlobalProperties(3000,1000,500);
        setTimeout(function(){
            fitFooterBottom($('#grid'));
        },3000);    
    </script>
</body>

我如何仅抓取json部分,我能够使用以下代码获取整个标记,但是当使用charles web调试器来反转iphone应用程序发出的请求时,它会生成相同的url请求,并且响应是相同的JSON文件...

我使用了以下代码,

import urllib
from bs4 import BeautifulSoup

url = "http://www.massimodutti.com/webapp/wcs/stores/servlet/ItxSolrSearchingDataCmd?catalogId=30220002&langId=-1&storeId=34009456&searchTerm=leather"

soup = BeautifulSoup(urllib.urlopen(url).read())

data = soup.find_all("script")
if data:
    print 'yes'
    desired= data[62]
else:
    print 'no'

print desired

任何想法如何去做?

0 个答案:

没有答案