PYTHON: Add support for data filters for AmunXML format.

The supported data filters are 'shuffle' and 'bytedelta'.

Signed-off-by: Grzegorz Kowal <grzegorz@amuncode.org>
This commit is contained in:
Grzegorz Kowal 2023-07-24 12:34:16 -03:00
parent d9a535a432
commit 5245070499

View File

@ -224,6 +224,24 @@ class AmunXML(Amun):
print("File '{}' seems to be corrupted! Proceeding anyway...".format(filename)) print("File '{}' seems to be corrupted! Proceeding anyway...".format(filename))
def __shuffle_decode(self, a, dtype='int64'):
import numpy
s = numpy.dtype(dtype).itemsize
d = [s, len(a) // s]
return numpy.frombuffer(a, dtype="int8").reshape(d).T.tobytes()
def __bytedelta_decode(self, a, dtype='int64'):
import numpy
s = numpy.dtype(dtype).itemsize
d = [s, len(a) // s]
return numpy.cumsum(numpy.frombuffer(a, dtype="int8").reshape(d), axis=-1, dtype='int8').T.tobytes()
def __read_binary_meta(self, dataset, dtype='int32'): def __read_binary_meta(self, dataset, dtype='int32'):
''' '''
Reads binary data of metadata. Reads binary data of metadata.
@ -254,6 +272,15 @@ class AmunXML(Amun):
else: else:
raise Exception("Binary file '{}' compressed in unsupported format {}!".format(fname, comp)) raise Exception("Binary file '{}' compressed in unsupported format {}!".format(fname, comp))
if 'data_filter' in self.binaries[dataset]:
data_filter = self.binaries[dataset]['data_filter']
if data_filter == 'bytedelta':
data = self.__bytedelta_decode(data, dtype=dtype)
elif data_filter == 'shuffle':
data = self.__shuffle_decode(data, dtype=dtype)
else:
raise Exception("Binary file '{}' processed using unsupported filter {}!".format(fname, data_filter))
if 'digest' in self.binaries[dataset]: if 'digest' in self.binaries[dataset]:
htype = self.binaries[dataset]['digest_type'] htype = self.binaries[dataset]['digest_type']
dhash = self.binaries[dataset]['digest'] dhash = self.binaries[dataset]['digest']
@ -302,6 +329,15 @@ class AmunXML(Amun):
else: else:
raise Exception("Binary file '{}' compressed in unsupported format {}!".format(fname, comp)) raise Exception("Binary file '{}' compressed in unsupported format {}!".format(fname, comp))
if 'data_filter' in self.chunks[chunk_number][dataset_name]:
data_filter = self.chunks[chunk_number][dataset_name]['data_filter']
if data_filter == 'bytedelta':
data = self.__bytedelta_decode(data, dtype=dtype)
elif data_filter == 'shuffle':
data = self.__shuffle_decode(data, dtype=dtype)
else:
raise Exception("Binary file '{}' processed using unsupported filter {}!".format(fname, data_filter))
if 'digest' in self.chunks[chunk_number][dataset_name]: if 'digest' in self.chunks[chunk_number][dataset_name]:
htype = self.chunks[chunk_number][dataset_name]['digest_type'] htype = self.chunks[chunk_number][dataset_name]['digest_type']
dhash = self.chunks[chunk_number][dataset_name]['digest'] dhash = self.chunks[chunk_number][dataset_name]['digest']