Skip to content

Commit 54cdae3

Browse files
authored
Merge pull request #49 from harryscholes/write-gzipped-csv
Add support for writing to gzipped CSV files
2 parents 518c1b8 + e2a5a14 commit 54cdae3

File tree

5 files changed

+54
-3
lines changed

5 files changed

+54
-3
lines changed

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@ using CSVFiles, DataFrames
2727
df = DataFrame(load("data.csv"))
2828
````
2929

30+
To read a gzipped CSV file into a ``DataFrame``:
31+
32+
````julia
33+
using CSVFiles, DataFrames
34+
35+
df = DataFrame(load(File(format"CSV", "data.csv.gz")))
36+
````
37+
3038
The call to ``load`` returns a ``struct`` that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing a CSV file into data structures that are not a ``DataFrame``:
3139

3240
````julia
@@ -87,6 +95,14 @@ save("output.csv", it)
8795
````
8896
This will work as long as ``it`` is any of the types supported as sources in [IterableTables.jl](https://github.com/queryverse/IterableTables.jl).
8997

98+
Compressed CSV files can be created by specifying the ``.gz`` file extension:
99+
100+
````julia
101+
using CSVFiles
102+
103+
save(File(format"CSV", "output.csv.gz"), df)
104+
````
105+
90106
One can also save into an arbitrary stream:
91107
````julia
92108
using CSVFiles

REQUIRE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ FileIO 1.0.1
88
HTTP 0.6.14
99
IterableTables 0.8.3
1010
TableShowUtils 0.1.1
11+
CodecZlib 0.5.2

src/CSVFiles.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module CSVFiles
22

33
using TextParse, IteratorInterfaceExtensions, TableTraits, TableTraitsUtils,
4-
DataValues, FileIO, HTTP, TableShowUtils
4+
DataValues, FileIO, HTTP, TableShowUtils, CodecZlib
55
import IterableTables
66

77
export load, save, File, @format_str

src/csv_writer.jl

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,16 @@ end
6565
function _save(filename::AbstractString, data; delim=',', quotechar='"', escapechar='"', nastring="NA", header=true)
6666
isiterabletable(data) || error("Can't write this data to a CSV file.")
6767

68-
open(filename, "w") do io
69-
_save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header)
68+
ext = last(split(filename, '.'))
69+
70+
if ext == "gz" # Gzipped
71+
open(GzipCompressorStream, filename, "w") do io
72+
_save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header)
73+
end
74+
else
75+
open(filename, "w") do io
76+
_save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header)
77+
end
7078
end
7179
end
7280

test/runtests.jl

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,32 @@ end
146146
end
147147
end
148148

149+
@testset "Compression" begin
150+
data = [(Name="John",Age=34.,Children=2),(Name="Sally",Age=54.,Children=1),(Name="Jim",Age=23.,Children=0)]
151+
152+
@testset "CSV" begin
153+
output_filename = "output.csv.gz"
154+
try
155+
save(File(format"CSV", output_filename), data)
156+
reloaded_data = collect(load(File(format"CSV", output_filename)))
157+
@test reloaded_data == data
158+
finally
159+
rm(output_filename)
160+
end
161+
end
162+
163+
@testset "TSV" begin
164+
output_filename = "output.tsv.gz"
165+
try
166+
save(File(format"TSV", output_filename), data)
167+
reloaded_data = collect(load(File(format"TSV", output_filename)))
168+
@test reloaded_data == data
169+
finally
170+
rm(output_filename)
171+
end
172+
end
173+
end
174+
149175
@testset "show" begin
150176
x = load(joinpath(@__DIR__, "data.csv"))
151177

0 commit comments

Comments
 (0)