利用phpspreadsheet可以輕松的解析excel文件,但是phpspreadsheet的內(nèi)存消耗也是比較大的,我試過解析將近5m的純文字excel內(nèi)存使用量就會(huì)超過php默認(rèn)的最大內(nèi)存128m。
當(dāng)然這可以用調(diào)節(jié)內(nèi)存大小的方法來解決,但是在并發(fā)量大的時(shí)候就比較危險(xiǎn)了。所以今天介紹下一種方法,利用phpspreadsheet對excel文件進(jìn)行切割,這是個(gè)拿時(shí)間換空間的方法所以一般對時(shí)效性要求低的需求可以使用。
方法:
先放個(gè)phpspreadsheet官網(wǎng)提供的一個(gè)功能readcell,我們就可以利用這個(gè)功能來進(jìn)行切割。
首先對excel文件進(jìn)行預(yù)讀,主要是獲取所有的工作表以及工作表下面的數(shù)據(jù)行數(shù),這個(gè)階段readcell方法一直返回的都是false,我們只需要記錄readcell進(jìn)來的工作表及數(shù)據(jù)行數(shù)。
然后就是對獲取到的記錄進(jìn)行分析,確定每部分?jǐn)?shù)據(jù)需要裝多少行原始excel的數(shù)據(jù),需要注意的是為了避免內(nèi)容混淆,不要將兩個(gè)工作表的內(nèi)容切到一起。
最后就是循環(huán)分析的數(shù)據(jù)和再次利用readcell獲取每部分?jǐn)?shù)據(jù),注意每次讀取文件后都要利用disconnectworksheets方法清理phpspreadsheet的內(nèi)存。
經(jīng)過測試發(fā)現(xiàn),利用該方法解析5m的excel文件,平均只需要21m的內(nèi)存就可以搞定!
代碼
<?php
namespace cutexcel;
require_once 'phpspreadsheet/autoload.php';
/
* 預(yù)讀過濾類
* @author wangyelou
* @date 2018-07-30
*/
class myaheadreadfilter implements \phpoffice\phpspreadsheet\reader\ireadfilter
{
public $record = array();
private $lastrow = '';
public function readcell($column, $row, $worksheetname = '')
{
if (isset($this->record[$worksheetname]) ) {
if ($this->lastrow != $row) {
$this->record[$worksheetname] ++;
$this->lastrow = $row;
}
} else {
$this->record[$worksheetname] = 1;
$this->lastrow = $row;
}
return false;
}
}
/
* 解析過濾類
* @author wangyelou
* @date 2018-07-30
*/
class myreadfilter implements \phpoffice\phpspreadsheet\reader\ireadfilter
{
public $startrow;
public $endrow;
public $worksheetname;
public function readcell($column, $row, $worksheetname = '')
{
if ($worksheetname == $this->worksheetname && $row >= ($this->startrow+1) && $row <= ($this->endrow+1)) {
return true;
}
return false;
}
}
/
* 切割類
* @author wangyelou
* @date 2018-07-30
*/
class excelcut
{
public $cutnum = 5;
public $returntype = 'csv';
public $filedir = '/tmp/';
public $log;
/
* 切割字符串
* @param $str
* @return array|bool
*/
public function cutfromstr($str)
{
try {
$filepath = '/tmp/' . time() . mt_rand(1000, 9000) . $this->returntype;
file_put_contents($filepath, $str);
if (file_exists($filepath)) {
$result = $this->cutfromfile($filepath);
unlink($filepath);
return $result;
} else {
throw new exception('文件寫入錯(cuò)誤');
}
} catch (exception $e) {
$this->log = $e->getmessage();
return false;
}
}
/
* 切割文件
* @param $file
* @return array|bool
*/
public function cutfromfile($file)
{
try {
$cutrules = $this->readaheadfromfile($file);
$dir = $this->getfiledir($file);
$returntype = $this->returntype ? $this->returntype : 'csv';
$results = array();
//初始化讀
$myfilter = new myreadfilter();
$inputfiletype = \phpoffice\phpspreadsheet\iofactory::identify($file);
$reader = \phpoffice\phpspreadsheet\iofactory::createreader($inputfiletype);
$reader->setreaddataonly(true);
$reader->setreadfilter($myfilter);
foreach ($cutrules as $sheetname => $rowindexrange) {
//讀
list($myfilter->startrow, $myfilter->endrow, $myfilter->worksheetname) = $rowindexrange;
$spreadsheetreader = $reader->load($file);
$sheetdata = $spreadsheetreader->setactivesheetindexbyname($myfilter->worksheetname)->toarray(null, false, false, false);
$realdatas = array_splice($sheetdata, $myfilter->startrow, ($myfilter->endrow - $myfilter->startrow + 1));
$spreadsheetreader->disconnectworksheets();
unset($sheetdata);
unset($spreadsheetreader);
//寫
$savefile = $dir . $sheetname . '.' . $returntype;
$spreadsheetwriter = new \phpoffice\phpspreadsheet\spreadsheet();
foreach ($realdatas as $rowindex => $row) {
foreach ($row as $colindex => $col) {
$spreadsheetwriter->getactivesheet()->setcellvaluebycolumnandrow($colindex+1, $rowindex+1, $col);
}
}
$writer = \phpoffice\phpspreadsheet\iofactory::createwriter($spreadsheetwriter, $returntype);
$writer->save($savefile);
$spreadsheetwriter->disconnectworksheets();
unset($spreadsheetwriter);
$results[] = $savefile;
}
return $results;
} catch (exception $e) {
$this->log = $e->getmessage();
return false;
}
}
/
* 預(yù)讀文件
*/
public function readaheadfromfile($file)
{
if (file_exists($file)) {
//獲取統(tǒng)計(jì)數(shù)據(jù)
$myfilter = new myaheadreadfilter();
$inputfiletype = \phpoffice\phpspreadsheet\iofactory::identify($file);
$reader = \phpoffice\phpspreadsheet\iofactory::createreader($inputfiletype);
$reader->setreaddataonly(true); //只讀數(shù)據(jù)
$reader->setreadfilter($myfilter);
$spreadsheet = $reader->load($file);
//$sheetdata = $spreadsheet->getactivesheet()->toarray(null, false, false, false);
list($filename,) = explode('.', basename($file));
$datas = array();
$averagenum = ceil(array_sum($myfilter->record) / $this->cutnum);
foreach ($myfilter->record as $sheetname => $count) {
for ($i=0; $i<ceil($count/$averagenum); $i++) {
$datas[$filename . '_' . $sheetname . '_' . $i] = array($i*$averagenum, ($i+1)*$averagenum-1, $sheetname);
}
}
return $datas;
} else {
throw new exception($file . ' not exists');
}
}
/
* 創(chuàng)建目錄
* @param $file
* @return bool|string
*/
protected function getfiledir($file)
{
$basename = basename($file);
list($name) = explode('.', $basename);
$fullname = $name .'_'. time() . '_' . mt_rand(1000, 9999);
$path = $this->filedir . $fullname . '/';
mkdir($path, 0777);
chmod($path, 0777);
if (is_dir($path)) {
return $path;
} else {
$this->log = mkdir {$path} failed;
return false;
}
}
}
什么是企業(yè)云服務(wù)器配置域名未來投資趨勢:創(chuàng)意域名交易阿里云一個(gè)服務(wù)器多個(gè)域名備案服務(wù)器被CC攻擊了怎么辦中文網(wǎng)址在哪里注冊谷歌瀏覽器scholarscope如何安裝 谷歌瀏覽器安裝scholarscope的方法二手電商市場又起硝煙 京東重啟拍拍二手品牌免費(fèi)網(wǎng)址域名使用陷阱和突出問題