CSV文件如下所示
Stock
asin name quantiy
----------------------------------
B01EI65LTI Blue Shoes 20
B079VGHTM2 Black Shoes 10
B07B33J5CK Green Shoes 3
Storage fees
asin currency estimated-monthly-storage-fee
----------------------------------------------------
B01EI65LTI EUR 0.7988
B079VGHTM2 GBP 0.4656
B07B33J5CK EUR 0.1264
Sales
amazon-order-id asin quantity item-price
----------------------------------------------------------
404-2135868-6928346 B01EI65LTI 1 59.95
402-0310960-9618709 B079VGHTM2 1 18.95
403-6094647-7799558 B07B33J5CK 1 14.95
当前,我的解决方案是向附加ID和关联 实体,例如库存和仓储费
/**
* @ORM\Entity(repositoryClass="App\Repository\StockRepository")
* @ORM\Table(indexes={@ORM\Index(name="asin_idx", columns={"asin"})})
*/
class Stock
{
/**
* @ORM\Id()
* @ORM\GeneratedValue()
* @ORM\Column(type="integer")
*/
private $id;
/** @ORM\Column(type="string", length=16) */
private $asin;
/** @ORM\Column(type="string", length=255) */
private $productName;
/** @ORM\Column(type="integer") */
private $afnWarehouseQuantity;
/**
* @OneToOne(targetEntity="StorageFee")
* @JoinColumn(name="storageFeeId", referencedColumnName="id")
*/
private $storageFee;
}
/** @ORM\Entity(repositoryClass="App\Repository\StorageFeeRepository") */
class StorageFee
{
/**
* @ORM\Id()
* @ORM\GeneratedValue()
* @ORM\Column(type="integer")
*/
private $id;
/** @ORM\Column(type="string", length=16) */
private $asin;
/** @ORM\Column(type="string", length=8) */
private $currency;
/** @ORM\Column(type="float") */
private $estimatedMonthlyStorageFee;
}
并“批量插入”实体
class StockRepository extends ServiceEntityRepository
{
public function insertFromFile(string $fileName)
{
$this->getEntityManager()->getConnection()
->getConfiguration()->setSQLLogger(null);
// read csv...
foreach ($csv as $row) {
$stock = (new Stock())
->setAsin($row['asin'])
->setName($row['name'])
->setQuantity($row['quantity'])
;
$this->getEntityManager()->persist($stock);
if (($numInsert % $20) === 0) {
$this->getEntityManager()->flush();
$this->getEntityManager()->clear();
}
}
$this->getEntityManager()->flush(); // flush remaining
$this->getEntityManager()->clear();
}
}
并在修改步骤中“批量更新”关联。
public function updateAssociation()
{
$this->getEntityManager()->getConnection()
->getConfiguration()->setSQLLogger(null);
$query = $this->getEntityManager()->createQuery('SELECT sf.id, sf.asin FROM App\Entity\StorageFee sf');
$storageFees = $query->getResult();
$query = $this->getEntityManager()->createQuery('UPDATE App\Entity\Stock s SET s.storageFee = :id WHERE s.asin = :asin');
foreach ($storageFees as $row) {
$query->setParameter('id', $row['id'])
->setParameter('asin', $row['asin']);
$query->execute();
}
}
我已尽力而为,但这些文件很大(最多35.000行) 我的脚本运行时间仍然很长-20-40秒。并设置 交往中的联想看起来不是很“专业”。 我认为。但是,将插入和更新与+60秒的运行时混合在一起(?) 也是一个问题。
Maybee我的概念有误。建议在这里使用 现有asin作为密钥?之前从未使用过字符串键... 并且在学说方面还没有做太多。
任何建议我都会很高兴。 谢谢与问候
答案 0 :(得分:1)
关于Jakumi-我已经更新了代码,并且以正确的导入顺序执行了一步,速度快了一倍。 (批量大小为200会导致更高的峰值内存使用率)
class StockRepository extends ServiceEntityRepository
{
public function insertFromFile(string $fileName)
{
$this->getEntityManager()->getConnection()
->getConfiguration()->setSQLLogger(null);
// get storeage fee ids
$query = $this->getEntityManager()
->createQuery(/** @lang DQL */'
SELECT sf.id, sf.asin
FROM App\Entity\StorageFee sf
');
$map = [];
foreach($query->getResult() as $row) {
$map[$row['asin']] = $row['id'];
}
// read csv...
foreach ($csv as $row) {
$stock = (new Stock())
->setAsin($row['asin'])
->setName($row['name'])
->setQuantity($row['quantity'])
;
// add reference
if (isset($map[$row['asin']])) {
$storageFee = $this->getEntityManager()->getReference('App\Entity\StorageFee', $map[$row['asin']]);
$stock->setStorageFee($storageFee);
}
$this->getEntityManager()->persist($stock);
if (($numInsert % 200) === 0) {
$this->getEntityManager()->flush();
$this->getEntityManager()->clear();
}
}
$this->getEntityManager()->flush(); // flush remaining
$this->getEntityManager()->clear();
}
}