Source code for mbs.krx

import io
import struct
import numpy as np

from .io import load


[docs] class KRXFile(object): def __init__(self, fname, zip_fname=None): self.fname = fname self.zip_fname = zip_fname self._read_header() def __repr__(self): return f"KRXFile({self.fname})" def _read_header(self): with load(self.fname, zip_fname=self.zip_fname, mode='rb') as f: endianness, char, itemsize = '<', 'i', 4 dt_test = np.array(struct.unpack(endianness + 2*char, f.read(2 * itemsize))) if not all(dt_test != 0): # 8byte int char, itemsize = 'q', 8 f.seek(0) hdr0 = struct.unpack(endianness + char, f.read(itemsize))[0] hdr1 = struct.unpack(endianness + char * hdr0, f.read(itemsize * hdr0)) hdr2 = struct.unpack(endianness + char * 2, f.read(itemsize * 2)) if hdr2[0] != 0: # DimSize+Len+MSA MapSizeArray map_size_arr = struct.unpack(endianness + char * hdr2[1], f.read(itemsize * hdr2[1])) no_y = map_size_arr[0] no_e = map_size_arr[1] self.map_size = map_size_arr[2:] else: # old KRX without dimension info MapSizeArray no_y = hdr1[1] no_e = hdr1[2] self.map_size = np.array([hdr0//3]) assert np.prod(self.map_size) == hdr0//3 hdr1 = np.array(hdr1).reshape(-1, 3) self.page_start = hdr1[:, 0] self.page_shape = hdr1[:, 1:] @property def num_pages(self): return np.prod(self.map_size)
[docs] def page(self, n=0): dt = np.dtype('int32') assert 0 <= n < self.num_pages with load(self.fname, zip_fname=self.zip_fname, mode='rb') as f: if isinstance(f, io.BufferedReader): # mmap normal files return np.memmap(f, mode='r', dtype=dt, offset=self.page_start[n] * dt.itemsize, shape=tuple(self.page_shape[n, ::-1]), order='F') else: # for zipped, gzipped, files etc. load data into memory f.seek(self.page_start[n] * dt.itemsize) nbytes = dt.itemsize * np.prod(self.page_shape[n, ::-1]) return np.frombuffer(f.read(nbytes), dtype=dt).reshape(tuple(self.page_shape[n])).T
[docs] def page_metadata(self, n=0): dt = np.dtype('int32') assert 0 <= n < self.num_pages page_size = self.page_start[n] + np.prod(self.page_shape[n]) with load(self.fname, zip_fname=self.zip_fname, mode='rb') as f: f.seek(page_size * dt.itemsize) hdr_len = struct.unpack('<' + dt.char, f.read(dt.itemsize))[0] return f.read(hdr_len).decode('utf8')
[docs] def export_page_txt(self, out_fname, n=0): with open(out_fname, 'w', newline='') as f: f.write(self.page_metadata(n)) np.savetxt(f, self.page(n), delimiter='\t', fmt="%d")