From bf187d92e39e93e8e08719de8b08c57db530eed9 Mon Sep 17 00:00:00 2001 From: Pierre Langlois Date: Thu, 13 Jun 2019 16:07:21 +0100 Subject: [PATCH] gnu: Add csvkit. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * gnu/packages/wireservice.scm (csvkit): New variable. * gnu/packages/patches/csvkit-fix-tests.patch: New file. * gnu/local.mk (dist_patch_DATA): Add it. Signed-off-by: Ludovic Courtès --- gnu/local.mk | 1 + gnu/packages/patches/csvkit-fix-tests.patch | 45 +++++++++++++ gnu/packages/wireservice.scm | 73 +++++++++++++++++++++ 3 files changed, 119 insertions(+) create mode 100644 gnu/packages/patches/csvkit-fix-tests.patch diff --git a/gnu/local.mk b/gnu/local.mk index 0f4c1ce5c8..734676f45a 100644 --- a/gnu/local.mk +++ b/gnu/local.mk @@ -742,6 +742,7 @@ dist_patch_DATA = \ %D%/packages/patches/cpufrequtils-fix-aclocal.patch \ %D%/packages/patches/crawl-upgrade-saves.patch \ %D%/packages/patches/crda-optional-gcrypt.patch \ + %D%/packages/patches/csvkit-fix-tests.patch \ %D%/packages/patches/clucene-contribs-lib.patch \ %D%/packages/patches/cube-nocheck.patch \ %D%/packages/patches/cursynth-wave-rand.patch \ diff --git a/gnu/packages/patches/csvkit-fix-tests.patch b/gnu/packages/patches/csvkit-fix-tests.patch new file mode 100644 index 0000000000..cb9ec39cb0 --- /dev/null +++ b/gnu/packages/patches/csvkit-fix-tests.patch @@ -0,0 +1,45 @@ +diff --git a/tests/test_utilities/test_csvsql.py b/tests/test_utilities/test_csvsql.py +index e6ec4af..4f47980 100644 +--- a/tests/test_utilities/test_csvsql.py ++++ b/tests/test_utilities/test_csvsql.py +@@ -197,7 +197,7 @@ class TestCSVSQL(CSVKitTestCase, EmptyFileTests): + utility.run() + output = output_file.getvalue() + output_file.close() +- self.assertEqual(output, 'a,b,c\n1,2,3\n0,5,6\n') ++ self.assertEqual(output, 'a,b,c\n1,2.0,3.0\n0,5.0,6.0\n') + + def test_no_prefix_unique_constraint(self): + self.get_output(['--db', 'sqlite:///' + self.db_file, '--insert', 'examples/dummy.csv', '--unique-constraint', 'a']) +diff --git a/tests/test_utilities/test_sql2csv.py b/tests/test_utilities/test_sql2csv.py +index a0c3d3e..babcfd6 100644 +--- a/tests/test_utilities/test_sql2csv.py ++++ b/tests/test_utilities/test_sql2csv.py +@@ -121,23 +121,23 @@ class TestSQL2CSV(CSVKitTestCase, EmptyFileTests): + input_file.close() + + def test_unicode(self): +- expected = self.csvsql('examples/test_utf8.csv') ++ self.csvsql('examples/test_utf8.csv') + csv = self.get_output(['--db', 'sqlite:///' + self.db_file, '--query', 'select * from foo']) +- self.assertEqual(csv.strip(), expected) ++ self.assertEqual(csv.strip(), 'foo,bar,baz\n1.0,2.0,3\n4.0,5.0,ʤ') + + def test_no_header_row(self): + self.csvsql('examples/dummy.csv') + csv = self.get_output(['--db', 'sqlite:///' + self.db_file, '--no-header-row', '--query', 'select * from foo']) + + self.assertTrue('a,b,c' not in csv) +- self.assertTrue('1,2,3' in csv) ++ self.assertTrue('1,2.0,3.0' in csv) + + def test_linenumbers(self): + self.csvsql('examples/dummy.csv') + csv = self.get_output(['--db', 'sqlite:///' + self.db_file, '--linenumbers', '--query', 'select * from foo']) + + self.assertTrue('line_number,a,b,c' in csv) +- self.assertTrue('1,1,2,3' in csv) ++ self.assertTrue('1,1,2.0,3.0' in csv) + + def test_wildcard_on_sqlite(self): + self.csvsql('examples/iris.csv') diff --git a/gnu/packages/wireservice.scm b/gnu/packages/wireservice.scm index 78c2a4264d..ab86f21f2f 100644 --- a/gnu/packages/wireservice.scm +++ b/gnu/packages/wireservice.scm @@ -19,6 +19,7 @@ (define-module (gnu packages wireservice) #:use-module ((guix licenses) #:prefix license:) #:use-module (guix build-system python) + #:use-module (guix download) #:use-module (guix git-download) #:use-module (guix packages) #:use-module (gnu packages) @@ -191,3 +192,75 @@ for dbf files support to all @code{agate.Table} instances."))) (synopsis "Add read support for Excel files (xls and xlsx) to agate") (description "@code{agateexcel} uses a monkey patching pattern to add read for xls and xlsx files support to all @code{agate.Table} instances."))) + +(define-public csvkit + (package + (name "csvkit") + (version "1.0.4") + (source (origin + (method url-fetch) + (uri (pypi-uri "csvkit" version)) + (sha256 + (base32 + "1830lb95rh1iyi3drlwxzb6y3pqkii0qiyzd40c1kvhvaf1s6lqk")) + (patches (search-patches "csvkit-fix-tests.patch")))) + (build-system python-build-system) + (native-inputs + `(("python-psycopg2" ,python-psycopg2) ;; Used to test PostgreSQL support. + ("python-sphinx" ,python-sphinx) + ("python-sphinx-rtd-theme" ,python-sphinx-rtd-theme))) + (inputs + `(("python-agate-dbf" ,python-agate-dbf) + ("python-agate-excel" ,python-agate-excel) + ("python-agate-sql" ,python-agate-sql) + ("python-six" ,python-six))) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'install 'install-docs + (lambda* (#:key outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (man1 (string-append out "/share/man/man1"))) + (with-directory-excursion "docs" + (invoke "make" "man") + (copy-recursively "_build/man" man1)) + #t)))))) + (home-page "https://csvkit.rtfd.org") + (synopsis "Command-line tools for working with CSV") + (description "csvkit is a suite of command-line tools for converting to +and working with CSV. It provides the following commands: +@itemize +@item Input: + @itemize + @item @command{in2csv}: Convert various formats to CSV. + @item @command{sql2csv}: Execute SQL commands on a database and return the +data as CSV. + @end itemize +@item Processing: + @itemize + @item @command{csvclean}: Remove common syntax errors. + @item @command{csvcut}: Filter and truncate CSV files. + @item @command{csvgrep}: Filter tabular data to only those rows where +certain columns contain a given value or match a regular expression. + @item @command{csvjoin}: Merges two or more CSV tables together using a +method analogous to SQL JOIN operation. + @item @command{csvsort}: Sort CSV files. + @item @command{csvstack}: Stack up the rows from multiple CSV files, +optionally adding a grouping value to each row. + @end itemize +@item Output and analysis: + @itemize + @item @command{csvformat}: Convert a CSV file to a custom output format. + @item @command{csvjson}: Converts a CSV file into JSON or GeoJSON. + @item @command{csvlook}: Renders a CSV to the command line in a +Markdown-compatible, fixed-width format. + @item @command{csvpy}: Loads a CSV file into a @code{agate.csv.Reader} +object and then drops into a Python shell so the user can inspect the data +however they see fit. + @item @command{csvsql}: Generate SQL statements for a CSV file or execute +those statements directly on a database. + @item @command{csvstat}: Prints descriptive statistics for all columns in a +CSV file. + @end itemize +@end itemize") + (license license:expat)))