ParserCSV.inc

  1. cis7 sites/all/modules/ulmus/feeds/libraries/ParserCSV.inc
  2. cle7 sites/all/modules/ulmus/feeds/libraries/ParserCSV.inc
  3. ecd7 sites/all/modules/ulmus/feeds/libraries/ParserCSV.inc
  4. elmsmedia7 sites/all/modules/ulmus/feeds/libraries/ParserCSV.inc
  5. harmony7 sites/all/modules/ulmus/feeds/libraries/ParserCSV.inc
  6. icor7 sites/all/modules/ulmus/feeds/libraries/ParserCSV.inc
  7. meedjum_blog7 sites/all/modules/ulmus/feeds/libraries/ParserCSV.inc
  8. mooc7 sites/all/modules/ulmus/feeds/libraries/ParserCSV.inc

Contains CSV Parser.

Functions in this file are independent of the Feeds specific implementation. Thanks to jpetso http://drupal.org/user/56020 for most of the code in this file.

Classes

Namesort descending Description
ParserCSV Functionality to parse CSV files into a two dimensional array.
ParserCSVIterator Text lines from file iterator.

File

sites/all/modules/ulmus/feeds/libraries/ParserCSV.inc
View source
  1. <?php
  2. /**
  3. * @file
  4. * Contains CSV Parser.
  5. *
  6. * Functions in this file are independent of the Feeds specific implementation.
  7. * Thanks to jpetso http://drupal.org/user/56020 for most of the code in this
  8. * file.
  9. */
  10. /**
  11. * Text lines from file iterator.
  12. */
  13. class ParserCSVIterator implements Iterator {
  14. private $handle;
  15. private $currentLine;
  16. private $currentPos;
  17. public function __construct($filepath) {
  18. $this->handle = fopen($filepath, 'r');
  19. $this->currentLine = NULL;
  20. $this->currentPos = NULL;
  21. }
  22. function __destruct() {
  23. if ($this->handle) {
  24. fclose($this->handle);
  25. }
  26. }
  27. public function rewind($pos = 0) {
  28. if ($this->handle) {
  29. fseek($this->handle, $pos);
  30. $this->next();
  31. }
  32. }
  33. public function next() {
  34. if ($this->handle) {
  35. $this->currentLine = feof($this->handle) ? NULL : fgets($this->handle);
  36. $this->currentPos = ftell($this->handle);
  37. return $this->currentLine;
  38. }
  39. }
  40. public function valid() {
  41. return isset($this->currentLine);
  42. }
  43. public function current() {
  44. return $this->currentLine;
  45. }
  46. public function currentPos() {
  47. return $this->currentPos;
  48. }
  49. public function key() {
  50. return 'line';
  51. }
  52. }
  53. /**
  54. * Functionality to parse CSV files into a two dimensional array.
  55. */
  56. class ParserCSV {
  57. private $delimiter;
  58. private $skipFirstLine;
  59. private $columnNames;
  60. private $timeout;
  61. private $timeoutReached;
  62. private $startByte;
  63. private $lineLimit;
  64. private $lastLinePos;
  65. public function __construct() {
  66. $this->delimiter = ',';
  67. $this->skipFirstLine = FALSE;
  68. $this->columnNames = FALSE;
  69. $this->timeout = FALSE;
  70. $this->timeoutReached = FALSE;
  71. $this->startByte = 0;
  72. $this->lineLimit = 0;
  73. $this->lastLinePos = 0;
  74. }
  75. /**
  76. * Set the column delimiter string.
  77. * By default, the comma (',') is used as delimiter.
  78. */
  79. public function setDelimiter($delimiter) {
  80. $this->delimiter = $delimiter;
  81. }
  82. /**
  83. * Set this to TRUE if the parser should skip the first line of the CSV text,
  84. * which might be desired if the first line contains the column names.
  85. * By default, this is set to FALSE and the first line is not skipped.
  86. */
  87. public function setSkipFirstLine($skipFirstLine) {
  88. $this->skipFirstLine = $skipFirstLine;
  89. }
  90. /**
  91. * Specify an array of column names if you know them in advance, or FALSE
  92. * (which is the default) to unset any prior column names. If no column names
  93. * are set, the parser will put each row into a simple numerically indexed
  94. * array. If column names are given, the parser will create arrays with
  95. * these column names as array keys instead.
  96. */
  97. public function setColumnNames($columnNames) {
  98. $this->columnNames = $columnNames;
  99. }
  100. /**
  101. * Define the time (in milliseconds) after which the parser stops parsing,
  102. * even if it has not yet finished processing the CSV data. If the timeout
  103. * has been reached before parsing is done, the parse() method will return
  104. * an incomplete list of rows - a single row will never be cut off in the
  105. * middle, though. By default, no timeout (@p $timeout == FALSE) is defined.
  106. *
  107. * You can check if the timeout has been reached by calling the
  108. * timeoutReached() method after parse() has been called.
  109. */
  110. public function setTimeout($timeout) {
  111. $this->timeout = $timeout;
  112. }
  113. /**
  114. * After calling the parse() method, determine if the timeout (set by the
  115. * setTimeout() method) has been reached.
  116. *
  117. * @deprecated Use lastLinePos() instead to determine whether a file has
  118. * finished parsing.
  119. */
  120. public function timeoutReached() {
  121. return $this->timeoutReached;
  122. }
  123. /**
  124. * Define the number of lines to parse in one parsing operation.
  125. *
  126. * By default, all lines of a file are being parsed.
  127. */
  128. public function setLineLimit($lines) {
  129. $this->lineLimit = $lines;
  130. }
  131. /**
  132. * Get the byte number where the parser left off after last parse() call.
  133. *
  134. * @return
  135. * 0 if all lines or no line has been parsed, the byte position of where a
  136. * timeout or the line limit has been reached otherwise. This position can be
  137. * used to set the start byte for the next iteration after parse() has
  138. * reached the timeout set with setTimeout() or the line limit set with
  139. * setLineLimit().
  140. *
  141. * @see ParserCSV::setStartByte()
  142. */
  143. public function lastLinePos() {
  144. return $this->lastLinePos;
  145. }
  146. /**
  147. * Set the byte where file should be started to read.
  148. *
  149. * Useful when parsing a file in batches.
  150. */
  151. public function setStartByte($start) {
  152. return $this->startByte = $start;
  153. }
  154. /**
  155. * Parse CSV files into a two dimensional array.
  156. *
  157. * @param Iterator $lineIterator
  158. * An Iterator object that yields line strings, e.g. ParserCSVIterator.
  159. * @param $start
  160. * The byte number from where to start parsing the file.
  161. * @param $lines
  162. * The number of lines to parse, 0 for all lines.
  163. * @return
  164. * Two dimensional array that contains the data in the CSV file.
  165. */
  166. public function parse(Iterator $lineIterator) {
  167. $skipLine = $this->skipFirstLine;
  168. $rows = array();
  169. $this->timeoutReached = FALSE;
  170. $this->lastLinePos = 0;
  171. $maxTime = empty($this->timeout) ? FALSE : (microtime() + $this->timeout);
  172. $linesParsed = 0;
  173. for ($lineIterator->rewind($this->startByte); $lineIterator->valid(); $lineIterator->next()) {
  174. // Make really sure we've got lines without trailing newlines.
  175. $line = trim($lineIterator->current(), "\r\n");
  176. // Skip empty lines.
  177. if (empty($line)) {
  178. continue;
  179. }
  180. // If the first line contains column names, skip it.
  181. if ($skipLine) {
  182. $skipLine = FALSE;
  183. continue;
  184. }
  185. // The actual parser. explode() is unfortunately not suitable because the
  186. // delimiter might be located inside a quoted field, and that would break
  187. // the field and/or require additional effort to re-join the fields.
  188. $quoted = FALSE;
  189. $currentIndex = 0;
  190. $currentField = '';
  191. $fields = array();
  192. // We must use strlen() as we're parsing byte by byte using strpos(), so
  193. // drupal_strlen() will not work properly.
  194. while ($currentIndex <= strlen($line)) {
  195. if ($quoted) {
  196. $nextQuoteIndex = strpos($line, '"', $currentIndex);
  197. if ($nextQuoteIndex === FALSE) {
  198. // There's a line break before the quote is closed, so fetch the
  199. // next line and start from there.
  200. $currentField .= substr($line, $currentIndex);
  201. $lineIterator->next();
  202. if (!$lineIterator->valid()) {
  203. // Whoa, an unclosed quote! Well whatever, let's just ignore
  204. // that shortcoming and record it nevertheless.
  205. $fields[] = $currentField;
  206. break;
  207. }
  208. // Ok, so, on with fetching the next line, as mentioned above.
  209. $currentField .= "\n";
  210. $line = trim($lineIterator->current(), "\r\n");
  211. $currentIndex = 0;
  212. continue;
  213. }
  214. // There's actually another quote in this line...
  215. // find out whether it's escaped or not.
  216. $currentField .= substr($line, $currentIndex, $nextQuoteIndex - $currentIndex);
  217. if (isset($line[$nextQuoteIndex + 1]) && $line[$nextQuoteIndex + 1] === '"') {
  218. // Escaped quote, add a single one to the field and proceed quoted.
  219. $currentField .= '"';
  220. $currentIndex = $nextQuoteIndex + 2;
  221. }
  222. else {
  223. // End of the quoted section, close the quote and let the
  224. // $quoted == FALSE block finalize the field.
  225. $quoted = FALSE;
  226. $currentIndex = $nextQuoteIndex + 1;
  227. }
  228. }
  229. else { // $quoted == FALSE
  230. // First, let's find out where the next character of interest is.
  231. $nextQuoteIndex = strpos($line, '"', $currentIndex);
  232. $nextDelimiterIndex = strpos($line, $this->delimiter, $currentIndex);
  233. if ($nextQuoteIndex === FALSE) {
  234. $nextIndex = $nextDelimiterIndex;
  235. }
  236. elseif ($nextDelimiterIndex === FALSE) {
  237. $nextIndex = $nextQuoteIndex;
  238. }
  239. else {
  240. $nextIndex = min($nextQuoteIndex, $nextDelimiterIndex);
  241. }
  242. if ($nextIndex === FALSE) {
  243. // This line is done, add the rest of it as last field.
  244. $currentField .= substr($line, $currentIndex);
  245. $fields[] = $currentField;
  246. break;
  247. }
  248. elseif ($line[$nextIndex] === $this->delimiter[0]) {
  249. $length = ($nextIndex + strlen($this->delimiter) - 1) - $currentIndex;
  250. $currentField .= substr($line, $currentIndex, $length);
  251. $fields[] = $currentField;
  252. $currentField = '';
  253. $currentIndex += $length + 1;
  254. // Continue with the next field.
  255. }
  256. else { // $line[$nextIndex] == '"'
  257. $quoted = TRUE;
  258. $currentField .= substr($line, $currentIndex, $nextIndex - $currentIndex);
  259. $currentIndex = $nextIndex + 1;
  260. // Continue this field in the $quoted == TRUE block.
  261. }
  262. }
  263. }
  264. // End of CSV parser. We've now got all the fields of the line as strings
  265. // in the $fields array.
  266. if (empty($this->columnNames)) {
  267. $row = $fields;
  268. }
  269. else {
  270. $row = array();
  271. foreach ($this->columnNames as $columnName) {
  272. $field = array_shift($fields);
  273. $row[$columnName] = isset($field) ? $field : '';
  274. }
  275. }
  276. $rows[] = $row;
  277. // Quit parsing if timeout has been reached or requested lines have been
  278. // reached.
  279. if (!empty($maxTime) && microtime() > $maxTime) {
  280. $this->timeoutReached = TRUE;
  281. $this->lastLinePos = $lineIterator->currentPos();
  282. break;
  283. }
  284. $linesParsed++;
  285. if ($this->lineLimit && $linesParsed >= $this->lineLimit) {
  286. $this->lastLinePos = $lineIterator->currentPos();
  287. break;
  288. }
  289. }
  290. return $rows;
  291. }
  292. }
Error | ELMSLN API

Error

×

Error message

  • Warning: Cannot modify header information - headers already sent by (output started at /var/www/html/elmsln_community/api.elmsln.org/includes/common.inc:2791) in drupal_send_headers() (line 1499 of /var/www/html/elmsln_community/api.elmsln.org/includes/bootstrap.inc).
  • Error: Call to undefined function apc_delete() in DrupalAPCCache->clear() (line 289 of /var/www/html/elmsln_community/api.elmsln.org/sites/all/modules/apc/drupal_apc_cache.inc).
The website encountered an unexpected error. Please try again later.