diff --git a/python/amunpy/src/amunpy/amunxml.py b/python/amunpy/src/amunpy/amunxml.py index 2c3599b..14c48d3 100644 --- a/python/amunpy/src/amunpy/amunxml.py +++ b/python/amunpy/src/amunpy/amunxml.py @@ -224,6 +224,24 @@ class AmunXML(Amun): print("File '{}' seems to be corrupted! Proceeding anyway...".format(filename)) + def __shuffle_decode(self, a, dtype='int64'): + import numpy + + s = numpy.dtype(dtype).itemsize + d = [s, len(a) // s] + + return numpy.frombuffer(a, dtype="int8").reshape(d).T.tobytes() + + + def __bytedelta_decode(self, a, dtype='int64'): + import numpy + + s = numpy.dtype(dtype).itemsize + d = [s, len(a) // s] + + return numpy.cumsum(numpy.frombuffer(a, dtype="int8").reshape(d), axis=-1, dtype='int8').T.tobytes() + + def __read_binary_meta(self, dataset, dtype='int32'): ''' Reads binary data of metadata. @@ -254,6 +272,15 @@ class AmunXML(Amun): else: raise Exception("Binary file '{}' compressed in unsupported format {}!".format(fname, comp)) + if 'data_filter' in self.binaries[dataset]: + data_filter = self.binaries[dataset]['data_filter'] + if data_filter == 'bytedelta': + data = self.__bytedelta_decode(data, dtype=dtype) + elif data_filter == 'shuffle': + data = self.__shuffle_decode(data, dtype=dtype) + else: + raise Exception("Binary file '{}' processed using unsupported filter {}!".format(fname, data_filter)) + if 'digest' in self.binaries[dataset]: htype = self.binaries[dataset]['digest_type'] dhash = self.binaries[dataset]['digest'] @@ -302,6 +329,15 @@ class AmunXML(Amun): else: raise Exception("Binary file '{}' compressed in unsupported format {}!".format(fname, comp)) + if 'data_filter' in self.chunks[chunk_number][dataset_name]: + data_filter = self.chunks[chunk_number][dataset_name]['data_filter'] + if data_filter == 'bytedelta': + data = self.__bytedelta_decode(data, dtype=dtype) + elif data_filter == 'shuffle': + data = self.__shuffle_decode(data, dtype=dtype) + else: + raise Exception("Binary file '{}' processed using unsupported filter {}!".format(fname, data_filter)) + if 'digest' in self.chunks[chunk_number][dataset_name]: htype = self.chunks[chunk_number][dataset_name]['digest_type'] dhash = self.chunks[chunk_number][dataset_name]['digest']