Skip to content

Commit 18610b9

Browse files
p-mongop
andcommitted
Fix RUBY-2210 GridFS does not write data correctly when given String in UTF-8 encoding (#1858)
shallow_symbolize_keys replaced by expanded version Co-authored-by: Oleg Pudeyev <[email protected]>
1 parent f634476 commit 18610b9

File tree

5 files changed

+79
-16
lines changed

5 files changed

+79
-16
lines changed

Diff for: lib/mongo/grid/file.rb

+5
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,11 @@ def inspect
104104
# chunk objects and assemble the data. If we have an IO object, then
105105
# it's the original file data and we must split it into chunks and set
106106
# the original data itself.
107+
#
108+
# @param [ IO, String, Array<BSON::Document> ] value The file object,
109+
# file contents or chunk documents.
110+
#
111+
# @return [ Array<Grid::File::Chunk> ] Array of chunks.
107112
def initialize_chunks!(value)
108113
if value.is_a?(Array)
109114
@chunks = value.map{ |doc| Chunk.new(doc) }

Diff for: lib/mongo/grid/file/chunk.rb

+2
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ class << self
151151
# @return [ String ] The assembled data.
152152
#
153153
# @since 2.0.0
154+
# @api private
154155
def assemble(chunks)
155156
chunks.reduce(''){ |data, chunk| data << chunk.data.data }
156157
end
@@ -167,6 +168,7 @@ def assemble(chunks)
167168
# @return [ Array<Chunk> ] The chunks of the data.
168169
#
169170
# @since 2.0.0
171+
# @api private
170172
def split(io, file_info, offset = 0)
171173
io = StringIO.new(io) if io.is_a?(String)
172174
parts = Enumerator.new { |y| y << io.read(file_info.chunk_size) until io.eof? }

Diff for: lib/mongo/grid/fs_bucket.rb

+15-13
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def initialize(database, options = {})
177177
#
178178
# @since 2.0.0
179179
def prefix
180-
@options[:fs_name] || @options[:bucket_name]|| DEFAULT_ROOT
180+
@options[:fs_name] || @options[:bucket_name] || DEFAULT_ROOT
181181
end
182182

183183
# Remove a single file from the GridFS.
@@ -230,7 +230,8 @@ def delete(id)
230230
#
231231
# @since 2.1.0
232232
def open_download_stream(id, options = nil)
233-
read_stream(id, options).tap do |stream|
233+
options = Hash[(options || {}).map { |k, v| [k.to_sym, v] }]
234+
read_stream(id, **options).tap do |stream|
234235
if block_given?
235236
begin
236237
yield stream
@@ -348,15 +349,15 @@ def download_to_stream_by_name(filename, io, opts = {})
348349
download_to_stream(open_download_stream_by_name(filename, opts).file_id, io)
349350
end
350351

351-
# Opens an upload stream to GridFS to which the contents of a user file came be written.
352+
# Opens an upload stream to GridFS to which the contents of a file or
353+
# blob can be written.
352354
#
353-
# @example Open a stream to which the contents of a file came be written.
354-
# fs.open_upload_stream('a-file.txt')
355-
#
356-
# @param [ String ] filename The filename of the file to upload.
355+
# @param [ String ] filename The name of the file in GridFS.
357356
# @param [ Hash ] opts The options for the write stream.
358357
#
359-
# @option opts [ Object ] :file_id An optional unique file id. An ObjectId is generated otherwise.
358+
# @option opts [ Object ] :file_id An optional unique file id.
359+
# A BSON::ObjectId is automatically generated if a file id is not
360+
# provided.
360361
# @option opts [ Integer ] :chunk_size Override the default chunk size.
361362
# @option opts [ Hash ] :metadata User data for the 'metadata' field of the files
362363
# collection document.
@@ -375,7 +376,8 @@ def download_to_stream_by_name(filename, io, opts = {})
375376
#
376377
# @since 2.1.0
377378
def open_upload_stream(filename, opts = {})
378-
write_stream(filename, opts).tap do |stream|
379+
opts = Hash[opts.map { |k, v| [k.to_sym, v] }]
380+
write_stream(filename, **opts).tap do |stream|
379381
if block_given?
380382
begin
381383
yield stream
@@ -462,12 +464,12 @@ def write_concern
462464
#
463465
# @option opts [ BSON::Document ] :file_info_doc For internal
464466
# driver use only. A BSON document to use as file information.
465-
def read_stream(id, opts = nil)
466-
Stream.get(self, Stream::READ_MODE, { file_id: id }.update(options).update(opts || {}))
467+
def read_stream(id, **opts)
468+
Stream.get(self, Stream::READ_MODE, { file_id: id }.update(options).update(opts))
467469
end
468470

469-
def write_stream(filename, opts)
470-
Stream.get(self, Stream::WRITE_MODE, { filename: filename }.merge!(options).merge!(opts))
471+
def write_stream(filename, **opts)
472+
Stream.get(self, Stream::WRITE_MODE, { filename: filename }.update(options).update(opts))
471473
end
472474

473475
def chunks_name

Diff for: lib/mongo/grid/stream/write.rb

+9-3
Original file line numberDiff line numberDiff line change
@@ -82,20 +82,26 @@ def initialize(fs, options)
8282
@open = true
8383
end
8484

85-
# Write to the GridFS bucket from the source stream.
85+
# Write to the GridFS bucket from the source stream or a string.
8686
#
8787
# @example Write to GridFS.
8888
# stream.write(io)
8989
#
90-
# @param [ IO ] io The source io stream to upload from.
90+
# @param [ String | IO ] io The string or IO object to upload from.
9191
#
9292
# @return [ Stream::Write ] self The write stream itself.
9393
#
9494
# @since 2.1.0
9595
def write(io)
9696
ensure_open!
9797
@indexes ||= ensure_indexes!
98-
@length += io.size
98+
@length += if io.respond_to?(:bytesize)
99+
# String objects
100+
io.bytesize
101+
else
102+
# IO objects
103+
io.size
104+
end
99105
chunks = File::Chunk.split(io, file_info, @n)
100106
@n += chunks.size
101107
chunks_collection.insert_many(chunks) unless chunks.empty?

Diff for: spec/integration/grid_fs_bucket_spec.rb

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
require 'spec_helper'
2+
3+
describe 'GridFS bucket integration' do
4+
let(:fs) do
5+
authorized_client.database.fs
6+
end
7+
8+
describe 'UTF-8 string write' do
9+
let(:data) { "hello\u2210" }
10+
11+
before do
12+
data.length.should_not == data.bytesize
13+
end
14+
15+
shared_examples 'round-trips' do
16+
it 'round-trips' do
17+
stream = fs.open_upload_stream('test') do |stream|
18+
stream.write(data_to_write)
19+
end
20+
21+
actual = nil
22+
fs.open_download_stream(stream.file_id) do |stream|
23+
actual = stream.read
24+
end
25+
26+
actual.encoding.name.should == 'ASCII-8BIT'
27+
actual.should == data.dup.force_encoding('binary')
28+
end
29+
end
30+
31+
context 'in binary encoding' do
32+
let(:data_to_write) do
33+
data.force_encoding('binary').freeze
34+
end
35+
36+
it_behaves_like 'round-trips'
37+
end
38+
39+
context 'in UTF-8 encoding' do
40+
let(:data_to_write) do
41+
data.encoding.name.should == 'UTF-8'
42+
data.freeze
43+
end
44+
45+
it_behaves_like 'round-trips'
46+
end
47+
end
48+
end

0 commit comments

Comments
 (0)