archon/dataframe

Archon:PHP数据分析库

v1.1.1 2020-06-06 23:33 UTC

This package is not auto-updated.

Last update: 2024-09-23 07:16:13 UTC


README

Build Status Coverage Status Latest Stable Version License

Archon是一个PHP库,旨在使处理表格/关系数据、文件和数据库变得简单。库的核心组件是DataFrame类 - 一种表格数据结构,在处理表格、二维数据时提高了抽象级别。

安装

使用Composer

composer require archon/dataframe
{
    "require": {
        "archon/dataframe": "1.1.1"
    }
}

需求

  • PHP 7.1或更高版本
  • php_pdo_sqlite扩展
  • php_mbstring扩展

依赖关系

许可证

数据格式示例

从数组实例化

$df = DataFrame::fromArray([
    ['a' => 1, 'b' => 2, 'c' => 3],
    ['a' => 4, 'b' => 5, 'c' => 6],
    ['a' => 7, 'b' => 8, 'c' => 9],
]);

读取CSV文件

x|y|z
1|2|3
4|5|6
7|8|9
$df = DataFrame::fromCSV($fileName, [
    'sep' => '|',
    'colmap' => [
	    'x' => 'a',
        'y' => 'b',
        'z' => 'c'
    ]
]);

写入CSV文件

$df->toCSV($fileName);
"a","b","c"
"1","2","3"
"4","5","6"
"7","8","9"

读取固定宽度的文件

foo bar baz
-----------
1   2   3
4   5   6
7   8   9
$df = DataFrame::fromFWF($fileName, [
	'a' => [0, 1],
    'b' => [4, 5],
    'c' => [8, 9]
], ['include' => '^[0-9]']);

读取XLSX电子表格

$dfA = DataFrame::fromXLSX($fileName, ['sheetname' => 'Sheet A']);
$dfB = DataFrame::fromXLSX($fileName, ['sheetname' => 'Sheet B']);
$dfC = DataFrame::fromXLSX($fileName, ['sheetname' => 'Sheet C']);

写入XLSX电子表格

$phpExcel = new PHPExcel();
$dfA->toXLSXWorksheet($phpExcel, 'Sheet A');
$dfB->toXLSXWorksheet($phpExcel, 'Sheet B');
$dfC->toXLSXWorksheet($phpExcel, 'Sheet C');
$writer = new PHPExcel_Writer_Excel2007($phpExcel);
$writer->save($fileName);

从数据库查询

$pdo = new PDO('sqlite::memory:');
$df = DataFrame::fromSQL('SELECT foo, bar, baz FROM table_name;', $pdo);

提交到数据库

$pdo = new PDO('sqlite::memory:');
$affected = $df->toSQL('table_name', $pdo);
echo sprintf('%d rows committed to database.', $affected);

显示HTML表格

$html = $df->toHTML(['class' => 'myclass', 'id' => 'myid']);

支持DataTables.js

$dataTable = $df->toHTML(['datatable' => '{ "optionKey": "optionValue" }']);

转换为JSON

$json = $df->toJSON();

从JSON创建

$df = DataFrame::fromJSON('[
    {"a": 1, "b": 2, "c": 3},
    {"a": 4, "b": 5, "c": 6},
    {"a": 7, "b": 8, "c": 9}
]');

提取底层二维数组

$myArray = $df->toArray();
print_r($myArray);
Array
(
    [0] => Array
        (
            [a] => 1
            [b] => 2
            [c] => 3
        )

    [1] => Array
        (
            [a] => 4
            [b] => 5
            [c] => 6
        )

    [2] => Array
        (
            [a] => 7
            [b] => 8
            [c] => 9
        )

)

基本操作

获取列名

$df->columns()
--------------
Array
(
    [0] => a
    [1] => b
    [2] => c
)

添加列

$df['key'] = 'value';

删除列

unset($df['key']);

计数行数

count($df);

遍历行

foreach ($df as $i => $row) {
   echo $i.': '.implode('-', $row).PHP_EOL; 
}
--------------------------
0: 1-2-3
1: 4-5-6
2: 7-8-9

高级操作

对行应用函数

$df = $df->apply(function ($row, $index) {
    $row['a'] = $row['c'] + 1;
    return $row;
});

直接对列应用函数

$df['a'] = function ($el, $key) {
    return $el + 3;
};

通过其他列的函数应用对列应用值

$df['a'] = $df['c']->apply(function ($el, $key) {
    return $el + 1;
});

应用类型

$df = DataFrame::fromArray([
    ['my_date'           => '11/20/16'],
    ['my_other_date'     => '2/12/2016'],
    ['my_decimal'        => '5,000.20'],
    ['my_int'            => '10-'],
    ['my_currency'       => '12345.67']
]);

$df->convertTypes([
    'my_date'       => 'DATE',
    'my_other_date' => 'DATE',
    'my_decimal'    => 'DECIMAL',
    'my_int'        => 'INT',
    'my_currency'   => 'CURRENCY'
], ['m/d/y', 'd/m/Y'], 'Y-m-d');

print_r($df->toArray());
Array
(
    [0] => Array
        (
            [my_date] => '2016-11-20'
            [my_other_date] => '2016-12-2'
            [my_decimal] => '5000.20'
            [my_int] => '-10'
            [my_currency] => '$12,345.67'
        )

)

使用SQL操作DataFrame

$df = DataFrame::fromArray([
    ['a' => 1, 'b' => 2, 'c' => 3],
    ['a' => 4, 'b' => 5, 'c' => 6],
    ['a' => 7, 'b' => 8, 'c' => 9],
]);

$df = $df->query("

SELECT
  a,
  b
FROM dataframe
WHERE a = '4'
  OR b = '2';

");

print_r($df->toArray());
Array
(
    [0] => Array
        (
            [a] => 1
            [b] => 2
        )

    [1] => Array
        (
            [a] => 4
            [b] => 5
        )

)
$df = DataFrame::fromArray([
    ['a' => 1, 'b' => 2, 'c' => 3],
    ['a' => 4, 'b' => 5, 'c' => 6],
    ['a' => 7, 'b' => 8, 'c' => 9],
]);

$df = $df->query("

UPDATE dataframe
SET a = c * 2;

");

print_r($df['a']->to_array());
Array
(
    [0] => Array
        (
            [a] => 6
        )

    [1] => Array
        (
            [a] => 12
        )

    [2] => Array
        (
            [a] => 18
        )

)