使用CachedSqlEntityProcessor提供异常时完全导入失败
我如何解决这个问题.......
不使用CachedSqlEntityProcessor,需要15小时才能编制索引
和我的products-data-config.xml是
<dataConfig>
<dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost:3306/localbazaar" user="root" password="sa" batchSize="100" />
<document name="products">
<entity name="domainProduct" query="SELECT p.PRODUCT_ID, p.NAME, LOWER(REPLACE(REPLACE(p.NAME,' ','-'),'/','-')) AS purl, p.description, p.BRAND_ID, p.CATEGORY_ID, p.GROUP_ID, p.MIN_PRICE, p.MAX_PRICE, p.AUTHOR, p.ISBN10, p.ISBN13, p.OLID, p.EAN13, p.UPCA, p.SKU, p.LANGUAGE, p.FORMAT, p.PUBLISHER, p.SUBJECT, c.NAME AS cname, c.URL_NAME, b.NAME AS bname, LOWER(REPLACE(REPLACE(b.NAME,' ','-'),'/','-')) AS bUrl, CONCAT('http://partnercenter.localbazaar.com/image?imageId=',i.IMAGE_NAME) AS productImage FROM product_t p LEFT OUTER JOIN category_t c ON (c.CATEGORY_ID=p.CATEGORY_ID) LEFT OUTER JOIN brand_t b ON (b.BRAND_ID=p.BRAND_ID) LEFT OUTER JOIN image_t i ON (i.ASSET_ID=p.PRODUCT_ID AND i.ASSET_TYPE_ID = 4 AND i.IMAGE_TYPE_ID = 0)">
<field column="PRODUCT_ID" name="productId" />
<field column="NAME" name="productName" />
<field column="purl" name="productUrlName" />
<field column="description" name="productDescription" />
<field column="BRAND_ID" name="brandId" />
<field column="CATEGORY_ID" name="categoryId" />
<field column="GROUP_ID" name="groupId" />
<field column="MIN_PRICE" name="minPrice" />
<field column="MAX_PRICE" name="maxPrice" />
<field column="AUTHOR" name="author" />
<field column="ISBN10" name="isbn10" />
<field column="ISBN13" name="isbn13" />
<field column="OLID" name="olid" />
<field column="EAN13" name="ean13" />
<field column="UPCA" name="upca" />
<field column="SKU" name="sku" />
<field column="LANGUAGE" name="language" />
<field column="FORMAT" name="format" />
<field column="PUBLISHER" name="publisher" />
<field column="SUBJECT" name="subject" />
<field column="cname" name="categoryName" />
<field column="URL_NAME" name="categoryUrlName" />
<field column="bname" name="brandName" />
<field column="bUrl" name="brandUrlName" />
<field column="productImage" name="productImage" />
<entity name="specifications" query="select PRODUCT_ID, CONCAT(PROPERTY_NAME,':::',property_value) as specifications FROM product_properties_t " processor="CachedSqlEntityProcessor" where="PRODUCT_ID=domainProduct.PRODUCT_ID" />
</entity>
</document>
</dataConfig>
和我的store-products-data-config.xml是
<dataConfig>
<dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost:3306/localbazaar" user="root" password="sa" batchSize="100" />
<document name="products">
<entity name="domainStoreProduct" query="SELECT sp.STORE_PRODUCT_ID, sp.STORE_ID, sp.PRODUCT_ID, sp.MIN_PRICE, sp.MAX_PRICE, sp.STORE_TYPE_ID, sp.BUY_X, sp.GET_Y, s.NAME AS sname, LOWER(REPLACE(REPLACE(s.NAME,' ','-'),'/','-')) AS sUrl, s.DESCRIPTION AS sdesc, s.WEB_SITE_UTL, s.EMAIL, s.PHONE, s.MOBILE, s.ACTIVE AS act, a.ADDRESS_ID, a.location, LOWER(REPLACE(REPLACE(a.location,' ','-'),'/','-')) AS urlLoc, a.ADDRESS_LINE1, a.ADDRESS_LINE2, a.LATITUDE, a.LONGITUDE, a.zipcode, a.LANDMARK, a.CITY, CONCAT(a.LATITUDE,',',a.LONGITUDE) AS ll, p.NAME AS pname, LOWER(REPLACE(REPLACE(p.NAME,' ','-'),'/','-')) AS purl, p.description AS pdesc, p.BRAND_ID, p.CATEGORY_ID, p.GROUP_ID, p.AUTHOR, p.ISBN10, p.ISBN13, p.OLID, p.EAN13, p.UPCA, p.SKU, p.LANGUAGE, p.FORMAT, p.PUBLISHER, p.SUBJECT, c.NAME AS cname, c.URL_NAME, b.NAME AS bname, LOWER(REPLACE(REPLACE(b.NAME,' ','-'),'/','-')) AS bUrl, CONCAT('http://partnercenter.localbazaar.com/image?imageId=',ip.IMAGE_NAME) AS pImage, CONCAT('http://partnercenter.localbazaar.com/image?imageId=',ist.IMAGE_NAME) AS sImage, ci.CITY_ID FROM store_products_t sp LEFT OUTER JOIN store_t s ON (sp.STORE_ID=s.STORE_ID) LEFT OUTER JOIN address_t a ON (a.ASSET_TYPE_ID=3 AND a.ASSET_ID=sp.STORE_ID) LEFT OUTER JOIN product_t p ON (p.PRODUCT_ID=sp.PRODUCT_ID) LEFT OUTER JOIN category_t c ON (c.CATEGORY_ID=p.CATEGORY_ID) LEFT OUTER JOIN brand_t b ON (b.BRAND_ID=p.BRAND_ID) LEFT OUTER JOIN image_t ip ON (ip.ASSET_ID=sp.PRODUCT_ID AND ip.ASSET_TYPE_ID=4 AND ip.IMAGE_TYPE_ID=0) LEFT OUTER JOIN image_t ist ON (ist.ASSET_ID=sp.STORE_ID AND ist.ASSET_TYPE_ID=3 AND ist.IMAGE_TYPE_ID=0) LEFT OUTER JOIN city_t ci ON (ci.NAME=a.CITY)">
<field column="STORE_PRODUCT_ID" name="storeProductId" />
<field column="STORE_ID" name="storeId" />
<field column="PRODUCT_ID" name="productId" />
<field column="MIN_PRICE" name="storeMinPrice" />
<field column="MAX_PRICE" name="storeMaxPrice" />
<field column="STORE_TYPE_ID" name="storeTypeId" />
<field column="BUY_X" name="buyX" />
<field column="GET_Y" name="getY" />
<field column="sname" name="storeName" />
<field column="sUrl" name="storeUrlName" />
<field column="sdesc" name="description" />
<field column="WEB_SITE_UTL" name="webSiteUrl" />
<field column="EMAIL" name="email" />
<field column="PHONE" name="phone" />
<field column="MOBILE" name="mobile" />
<field column="act" name="active" />
<field column="ADDRESS_ID" name="addressId" />
<field column="location" name="location" />
<field column="urlLoc" name="urlLocation" />
<field column="ADDRESS_LINE1" name="addressLine1" />
<field column="ADDRESS_LINE2" name="addressLine2" />
<field column="LATITUDE" name="latitude" />
<field column="LONGITUDE" name="longitude" />
<field column="zipcode" name="zipcode" />
<field column="LANDMARK" name="landmark" />
<field column="CITY" name="city" />
<field column="ll" name="latlong" />
<field column="pname" name="productName" />
<field column="purl" name="productUrlName" />
<field column="pdesc" name="productDescription" />
<field column="BRAND_ID" name="brandId" />
<field column="CATEGORY_ID" name="categoryId" />
<field column="GROUP_ID" name="groupId" />
<field column="AUTHOR" name="author" />
<field column="ISBN10" name="isbn10" />
<field column="ISBN13" name="isbn13" />
<field column="OLID" name="olid" />
<field column="EAN13" name="ean13" />
<field column="UPCA" name="upca" />
<field column="SKU" name="sku" />
<field column="LANGUAGE" name="language" />
<field column="FORMAT" name="format" />
<field column="PUBLISHER" name="publisher" />
<field column="SUBJECT" name="subject" />
<field column="cname" name="categoryName" />
<field column="URL_NAME" name="categoryUrlName" />
<field column="bname" name="brandName" />
<field column="bUrl" name="brandUrlName" />
<field column="pImage" name="productImage" />
<field column="sImage" name="storeImage" />
<field column="CITY_ID" name="cityId" />
<entity name="specifications" query="select PRODUCT_ID, CONCAT(PROPERTY_NAME,':::',property_value) as specifications FROM product_properties_t " processor="CachedSqlEntityProcessor" WHERE="PRODUCT_ID= domainStoreProduct.PRODUCT_ID" />
<entity name="storeProperties" query="select STORE_ID, CONCAT(PROPERTY_ID,':::',PROPERTY_VALUE) as storeProperties FROM store_properties_t " processor="CachedSqlEntityProcessor" WHERE="STORE_ID=domainStoreProduct.STORE_ID" />
</entity>
</document>
</dataConfig>
答案 0 :(得分:2)
您可以尝试不同的事情:
batchSize
属性。如果正确调整,可以提高数据源的性能。SELECT *
总是比提供您需要的列慢(即使您需要所有列)。我建议使用SELECT PRODUCT_ID, NAME, ...
代替*
编辑:我建议将这个问题分成2个问题,因为现在用其旧答案读取你的新问题真是太奇怪了。
我认为你不能选择CachedSqlEntityProcessor放置缓存的位置(我认为它始终在内存中)。 8小时数据导入的问题在于,因为我们讨论的是大量记录,所以会使用大量查询(每个子实体都使用自己的查询)。
问题的解决方案是删除子实体,并在您的父实体中将子实体的查询添加为逗号分隔列表。我建议看一下this answer。
如果这样做,您的所有规范(例如)都将作为逗号分隔列表存储在一列中。然后,您可以使用Solr ScriptTransformer分割值并创建多个值。
这将查询的数量限制为1个大查询,并且还将限制RAM的使用,因为它将单独解析每个查询。我不知道性能会是什么,因为你必须单独解析每个实体。
如果这不起作用,我认为有一个更好的解决方案,而不是等待8小时才能完成数据导入。您不能指望Solr会将其全部索引为1 2 3.您可以尝试使用cronjob来运行此任务过夜。