""" Generic helpers for LLVM code generation. """ import collections from contextlib import contextmanager import functools from llvmlite import ir from numba.core import utils, types, config, debuginfo import numba.core.datamodel bool_t = ir.IntType(1) int8_t = ir.IntType(8) int32_t = ir.IntType(32) intp_t = ir.IntType(utils.MACHINE_BITS) voidptr_t = int8_t.as_pointer() true_bit = bool_t(1) false_bit = bool_t(0) true_byte = int8_t(1) false_byte = int8_t(0) def as_bool_bit(builder, value): return builder.icmp_unsigned('!=', value, value.type(0)) def make_anonymous_struct(builder, values, struct_type=None): """ Create an anonymous struct containing the given LLVM *values*. """ if struct_type is None: struct_type = ir.LiteralStructType([v.type for v in values]) struct_val = struct_type(ir.Undefined) for i, v in enumerate(values): struct_val = builder.insert_value(struct_val, v, i) return struct_val def make_bytearray(buf): """ Make a byte array constant from *buf*. """ b = bytearray(buf) n = len(b) return ir.Constant(ir.ArrayType(ir.IntType(8), n), b) _struct_proxy_cache = {} def create_struct_proxy(fe_type, kind='value'): """ Returns a specialized StructProxy subclass for the given fe_type. """ cache_key = (fe_type, kind) res = _struct_proxy_cache.get(cache_key) if res is None: base = {'value': ValueStructProxy, 'data': DataStructProxy, }[kind] clsname = base.__name__ + '_' + str(fe_type) bases = (base,) clsmembers = dict(_fe_type=fe_type) res = type(clsname, bases, clsmembers) _struct_proxy_cache[cache_key] = res return res def copy_struct(dst, src, repl={}): """ Copy structure from *src* to *dst* with replacement from *repl*. """ repl = repl.copy() # copy data from src or use those in repl for k in src._datamodel._fields: v = repl.pop(k, getattr(src, k)) setattr(dst, k, v) # use remaining key-values in repl for k, v in repl.items(): setattr(dst, k, v) return dst class _StructProxy(object): """ Creates a `Structure` like interface that is constructed with information from DataModel instance. FE type must have a data model that is a subclass of StructModel. """ # The following class members must be overridden by subclass _fe_type = None def __init__(self, context, builder, value=None, ref=None): self._context = context self._datamodel = self._context.data_model_manager[self._fe_type] if not isinstance(self._datamodel, numba.core.datamodel.StructModel): raise TypeError( "Not a structure model: {0}".format(self._datamodel)) self._builder = builder self._be_type = self._get_be_type(self._datamodel) assert not is_pointer(self._be_type) outer_ref, ref = self._make_refs(ref) if ref.type.pointee != self._be_type: raise AssertionError("bad ref type: expected %s, got %s" % (self._be_type.as_pointer(), ref.type)) if value is not None: if value.type != outer_ref.type.pointee: raise AssertionError("bad value type: expected %s, got %s" % (outer_ref.type.pointee, value.type)) self._builder.store(value, outer_ref) self._value = ref self._outer_ref = outer_ref def _make_refs(self, ref): """ Return an (outer ref, value ref) pair. By default, these are the same pointers, but a derived class may override this. """ if ref is None: ref = alloca_once(self._builder, self._be_type, zfill=True) return ref, ref def _get_be_type(self, datamodel): raise NotImplementedError def _cast_member_to_value(self, index, val): raise NotImplementedError def _cast_member_from_value(self, index, val): raise NotImplementedError def _get_ptr_by_index(self, index): return gep_inbounds(self._builder, self._value, 0, index) def _get_ptr_by_name(self, attrname): index = self._datamodel.get_field_position(attrname) return self._get_ptr_by_index(index) def __getattr__(self, field): """ Load the LLVM value of the named *field*. """ if not field.startswith('_'): return self[self._datamodel.get_field_position(field)] else: raise AttributeError(field) def __setattr__(self, field, value): """ Store the LLVM *value* into the named *field*. """ if field.startswith('_'): return super(_StructProxy, self).__setattr__(field, value) self[self._datamodel.get_field_position(field)] = value def __getitem__(self, index): """ Load the LLVM value of the field at *index*. """ member_val = self._builder.load(self._get_ptr_by_index(index)) return self._cast_member_to_value(index, member_val) def __setitem__(self, index, value): """ Store the LLVM *value* into the field at *index*. """ ptr = self._get_ptr_by_index(index) value = self._cast_member_from_value(index, value) if value.type != ptr.type.pointee: if (is_pointer(value.type) and is_pointer(ptr.type.pointee) and value.type.pointee == ptr.type.pointee.pointee): # Differ by address-space only # Auto coerce it value = self._context.addrspacecast(self._builder, value, ptr.type.pointee.addrspace) else: raise TypeError("Invalid store of {value.type} to " "{ptr.type.pointee} in " "{self._datamodel} " "(trying to write member #{index})" .format(value=value, ptr=ptr, self=self, index=index)) self._builder.store(value, ptr) def __len__(self): """ Return the number of fields. """ return self._datamodel.field_count def _getpointer(self): """ Return the LLVM pointer to the underlying structure. """ return self._outer_ref def _getvalue(self): """ Load and return the value of the underlying LLVM structure. """ return self._builder.load(self._outer_ref) def _setvalue(self, value): """ Store the value in this structure. """ assert not is_pointer(value.type) assert value.type == self._be_type, (value.type, self._be_type) self._builder.store(value, self._value) class ValueStructProxy(_StructProxy): """ Create a StructProxy suitable for accessing regular values (e.g. LLVM values or alloca slots). """ def _get_be_type(self, datamodel): return datamodel.get_value_type() def _cast_member_to_value(self, index, val): return val def _cast_member_from_value(self, index, val): return val class DataStructProxy(_StructProxy): """ Create a StructProxy suitable for accessing data persisted in memory. """ def _get_be_type(self, datamodel): return datamodel.get_data_type() def _cast_member_to_value(self, index, val): model = self._datamodel.get_model(index) return model.from_data(self._builder, val) def _cast_member_from_value(self, index, val): model = self._datamodel.get_model(index) return model.as_data(self._builder, val) class Structure(object): """ A high-level object wrapping a alloca'ed LLVM structure, including named fields and attribute access. """ # XXX Should this warrant several separate constructors? def __init__(self, context, builder, value=None, ref=None, cast_ref=False): self._type = context.get_struct_type(self) self._context = context self._builder = builder if ref is None: self._value = alloca_once(builder, self._type, zfill=True) if value is not None: assert not is_pointer(value.type) assert value.type == self._type, (value.type, self._type) builder.store(value, self._value) else: assert value is None assert is_pointer(ref.type) if self._type != ref.type.pointee: if cast_ref: ref = builder.bitcast(ref, self._type.as_pointer()) else: raise TypeError( "mismatching pointer type: got %s, expected %s" % (ref.type.pointee, self._type)) self._value = ref self._namemap = {} self._fdmap = [] self._typemap = [] base = int32_t(0) for i, (k, tp) in enumerate(self._fields): self._namemap[k] = i self._fdmap.append((base, int32_t(i))) self._typemap.append(tp) def _get_ptr_by_index(self, index): ptr = self._builder.gep(self._value, self._fdmap[index], inbounds=True) return ptr def _get_ptr_by_name(self, attrname): return self._get_ptr_by_index(self._namemap[attrname]) def __getattr__(self, field): """ Load the LLVM value of the named *field*. """ if not field.startswith('_'): return self[self._namemap[field]] else: raise AttributeError(field) def __setattr__(self, field, value): """ Store the LLVM *value* into the named *field*. """ if field.startswith('_'): return super(Structure, self).__setattr__(field, value) self[self._namemap[field]] = value def __getitem__(self, index): """ Load the LLVM value of the field at *index*. """ return self._builder.load(self._get_ptr_by_index(index)) def __setitem__(self, index, value): """ Store the LLVM *value* into the field at *index*. """ ptr = self._get_ptr_by_index(index) if ptr.type.pointee != value.type: fmt = "Type mismatch: __setitem__(%d, ...) expected %r but got %r" raise AssertionError(fmt % (index, str(ptr.type.pointee), str(value.type))) self._builder.store(value, ptr) def __len__(self): """ Return the number of fields. """ return len(self._namemap) def _getpointer(self): """ Return the LLVM pointer to the underlying structure. """ return self._value def _getvalue(self): """ Load and return the value of the underlying LLVM structure. """ return self._builder.load(self._value) def _setvalue(self, value): """Store the value in this structure""" assert not is_pointer(value.type) assert value.type == self._type, (value.type, self._type) self._builder.store(value, self._value) # __iter__ is derived by Python from __len__ and __getitem__ def alloca_once(builder, ty, size=None, name='', zfill=False): """Allocate stack memory at the entry block of the current function pointed by ``builder`` withe llvm type ``ty``. The optional ``size`` arg set the number of element to allocate. The default is 1. The optional ``name`` arg set the symbol name inside the llvm IR for debugging. If ``zfill`` is set, fill the memory with zeros at the current use-site location. Note that the memory is always zero-filled after the ``alloca`` at init-site (the entry block). """ if isinstance(size, int): size = ir.Constant(intp_t, size) # suspend debug metadata emission else it links up python source lines with # alloca in the entry block as well as their actual location and it makes # the debug info "jump about". with debuginfo.suspend_emission(builder): with builder.goto_entry_block(): ptr = builder.alloca(ty, size=size, name=name) # Always zero-fill at init-site. This is safe. builder.store(ptr.type.pointee(None), ptr) # Also zero-fill at the use-site if zfill: builder.store(ptr.type.pointee(None), ptr) return ptr def sizeof(builder, ptr_type): """Compute sizeof using GEP """ null = ptr_type(None) offset = null.gep([int32_t(1)]) return builder.ptrtoint(offset, intp_t) def alloca_once_value(builder, value, name='', zfill=False): """ Like alloca_once(), but passing a *value* instead of a type. The type is inferred and the allocated slot is also initialized with the given value. """ storage = alloca_once(builder, value.type, zfill=zfill) builder.store(value, storage) return storage def insert_pure_function(module, fnty, name): """ Insert a pure function (in the functional programming sense) in the given module. """ fn = get_or_insert_function(module, fnty, name) fn.attributes.add("readonly") fn.attributes.add("nounwind") return fn def get_or_insert_function(module, fnty, name): """ Get the function named *name* with type *fnty* from *module*, or insert it if it doesn't exist. """ fn = module.globals.get(name, None) if fn is None: fn = ir.Function(module, fnty, name) return fn def get_or_insert_named_metadata(module, name): try: return module.get_named_metadata(name) except KeyError: return module.add_named_metadata(name) def add_global_variable(module, ty, name, addrspace=0): unique_name = module.get_unique_name(name) return ir.GlobalVariable(module, ty, unique_name, addrspace) def terminate(builder, bbend): bb = builder.basic_block if bb.terminator is None: builder.branch(bbend) def get_null_value(ltype): return ltype(None) def is_null(builder, val): null = get_null_value(val.type) return builder.icmp_unsigned('==', null, val) def is_not_null(builder, val): null = get_null_value(val.type) return builder.icmp_unsigned('!=', null, val) def if_unlikely(builder, pred): return builder.if_then(pred, likely=False) def if_likely(builder, pred): return builder.if_then(pred, likely=True) def ifnot(builder, pred): return builder.if_then(builder.not_(pred)) def increment_index(builder, val): """ Increment an index *val*. """ one = val.type(1) # We pass the "nsw" flag in the hope that LLVM understands the index # never changes sign. Unfortunately this doesn't always work # (e.g. ndindex()). return builder.add(val, one, flags=['nsw']) Loop = collections.namedtuple('Loop', ('index', 'do_break')) @contextmanager def for_range(builder, count, start=None, intp=None): """ Generate LLVM IR for a for-loop in [start, count). *start* is equal to 0 by default. Yields a Loop namedtuple with the following members: - `index` is the loop index's value - `do_break` is a no-argument callable to break out of the loop """ if intp is None: intp = count.type if start is None: start = intp(0) stop = count bbcond = builder.append_basic_block("for.cond") bbbody = builder.append_basic_block("for.body") bbend = builder.append_basic_block("for.end") def do_break(): builder.branch(bbend) bbstart = builder.basic_block builder.branch(bbcond) with builder.goto_block(bbcond): index = builder.phi(intp, name="loop.index") pred = builder.icmp_signed('<', index, stop) builder.cbranch(pred, bbbody, bbend) with builder.goto_block(bbbody): yield Loop(index, do_break) # Update bbbody as a new basic block may have been activated bbbody = builder.basic_block incr = increment_index(builder, index) terminate(builder, bbcond) index.add_incoming(start, bbstart) index.add_incoming(incr, bbbody) builder.position_at_end(bbend) @contextmanager def for_range_slice(builder, start, stop, step, intp=None, inc=True): """ Generate LLVM IR for a for-loop based on a slice. Yields a (index, count) tuple where `index` is the slice index's value inside the loop, and `count` the iteration count. Parameters ------------- builder : object Builder object start : int The beginning value of the slice stop : int The end value of the slice step : int The step value of the slice intp : The data type inc : boolean, optional Signals whether the step is positive (True) or negative (False). Returns ----------- None """ if intp is None: intp = start.type bbcond = builder.append_basic_block("for.cond") bbbody = builder.append_basic_block("for.body") bbend = builder.append_basic_block("for.end") bbstart = builder.basic_block builder.branch(bbcond) with builder.goto_block(bbcond): index = builder.phi(intp, name="loop.index") count = builder.phi(intp, name="loop.count") if (inc): pred = builder.icmp_signed('<', index, stop) else: pred = builder.icmp_signed('>', index, stop) builder.cbranch(pred, bbbody, bbend) with builder.goto_block(bbbody): yield index, count bbbody = builder.basic_block incr = builder.add(index, step) next_count = increment_index(builder, count) terminate(builder, bbcond) index.add_incoming(start, bbstart) index.add_incoming(incr, bbbody) count.add_incoming(ir.Constant(intp, 0), bbstart) count.add_incoming(next_count, bbbody) builder.position_at_end(bbend) @contextmanager def for_range_slice_generic(builder, start, stop, step): """ A helper wrapper for for_range_slice(). This is a context manager which yields two for_range_slice()-alike context managers, the first for the positive step case, the second for the negative step case. Use: with for_range_slice_generic(...) as (pos_range, neg_range): with pos_range as (idx, count): ... with neg_range as (idx, count): ... """ intp = start.type is_pos_step = builder.icmp_signed('>=', step, ir.Constant(intp, 0)) pos_for_range = for_range_slice(builder, start, stop, step, intp, inc=True) neg_for_range = for_range_slice(builder, start, stop, step, intp, inc=False) @contextmanager def cm_cond(cond, inner_cm): with cond: with inner_cm as value: yield value with builder.if_else(is_pos_step, likely=True) as (then, otherwise): yield cm_cond(then, pos_for_range), cm_cond(otherwise, neg_for_range) @contextmanager def loop_nest(builder, shape, intp, order='C'): """ Generate a loop nest walking a N-dimensional array. Yields a tuple of N indices for use in the inner loop body, iterating over the *shape* space. If *order* is 'C' (the default), indices are incremented inside-out (i.e. (0,0), (0,1), (0,2), (1,0) etc.). If *order* is 'F', they are incremented outside-in (i.e. (0,0), (1,0), (2,0), (0,1) etc.). This has performance implications when walking an array as it impacts the spatial locality of memory accesses. """ assert order in 'CF' if not shape: # 0-d array yield () else: if order == 'F': _swap = lambda x: x[::-1] else: _swap = lambda x: x with _loop_nest(builder, _swap(shape), intp) as indices: assert len(indices) == len(shape) yield _swap(indices) @contextmanager def _loop_nest(builder, shape, intp): with for_range(builder, shape[0], intp=intp) as loop: if len(shape) > 1: with _loop_nest(builder, shape[1:], intp) as indices: yield (loop.index,) + indices else: yield (loop.index,) def pack_array(builder, values, ty=None): """ Pack a sequence of values in a LLVM array. *ty* should be given if the array may be empty, in which case the type can't be inferred from the values. """ n = len(values) if ty is None: ty = values[0].type ary = ir.ArrayType(ty, n)(ir.Undefined) for i, v in enumerate(values): ary = builder.insert_value(ary, v, i) return ary def pack_struct(builder, values): """ Pack a sequence of values into a LLVM struct. """ structty = ir.LiteralStructType([v.type for v in values]) st = structty(ir.Undefined) for i, v in enumerate(values): st = builder.insert_value(st, v, i) return st def unpack_tuple(builder, tup, count=None): """ Unpack an array or structure of values, return a Python tuple. """ if count is None: # Assuming *tup* is an aggregate count = len(tup.type.elements) vals = [builder.extract_value(tup, i) for i in range(count)] return vals def get_item_pointer(context, builder, aryty, ary, inds, wraparound=False, boundscheck=False): # Set boundscheck=True for any pointer access that should be # boundschecked. do_boundscheck() will handle enabling or disabling the # actual boundschecking based on the user config. shapes = unpack_tuple(builder, ary.shape, count=aryty.ndim) strides = unpack_tuple(builder, ary.strides, count=aryty.ndim) return get_item_pointer2(context, builder, data=ary.data, shape=shapes, strides=strides, layout=aryty.layout, inds=inds, wraparound=wraparound, boundscheck=boundscheck) def do_boundscheck(context, builder, ind, dimlen, axis=None): def _dbg(): # Remove this when we figure out how to include this information # in the error message. if axis is not None: if isinstance(axis, int): printf(builder, "debug: IndexError: index %d is out of bounds " "for axis {} with size %d\n".format(axis), ind, dimlen) else: printf(builder, "debug: IndexError: index %d is out of bounds " "for axis %d with size %d\n", ind, axis, dimlen) else: printf(builder, "debug: IndexError: index %d is out of bounds for size %d\n", ind, dimlen) msg = "index is out of bounds" out_of_bounds_upper = builder.icmp_signed('>=', ind, dimlen) with if_unlikely(builder, out_of_bounds_upper): if config.FULL_TRACEBACKS: _dbg() context.call_conv.return_user_exc(builder, IndexError, (msg,)) out_of_bounds_lower = builder.icmp_signed('<', ind, ind.type(0)) with if_unlikely(builder, out_of_bounds_lower): if config.FULL_TRACEBACKS: _dbg() context.call_conv.return_user_exc(builder, IndexError, (msg,)) def get_item_pointer2(context, builder, data, shape, strides, layout, inds, wraparound=False, boundscheck=False): # Set boundscheck=True for any pointer access that should be # boundschecked. do_boundscheck() will handle enabling or disabling the # actual boundschecking based on the user config. if wraparound: # Wraparound indices = [] for ind, dimlen in zip(inds, shape): negative = builder.icmp_signed('<', ind, ind.type(0)) wrapped = builder.add(dimlen, ind) selected = builder.select(negative, wrapped, ind) indices.append(selected) else: indices = inds if boundscheck: for axis, (ind, dimlen) in enumerate(zip(indices, shape)): do_boundscheck(context, builder, ind, dimlen, axis) if not indices: # Indexing with empty tuple return builder.gep(data, [int32_t(0)]) intp = indices[0].type # Indexing code if layout in 'CF': steps = [] # Compute steps for each dimension if layout == 'C': # C contiguous for i in range(len(shape)): last = intp(1) for j in shape[i + 1:]: last = builder.mul(last, j) steps.append(last) elif layout == 'F': # F contiguous for i in range(len(shape)): last = intp(1) for j in shape[:i]: last = builder.mul(last, j) steps.append(last) else: raise Exception("unreachable") # Compute index loc = intp(0) for i, s in zip(indices, steps): tmp = builder.mul(i, s) loc = builder.add(loc, tmp) ptr = builder.gep(data, [loc]) return ptr else: # Any layout dimoffs = [builder.mul(s, i) for s, i in zip(strides, indices)] offset = functools.reduce(builder.add, dimoffs) return pointer_add(builder, data, offset) def _scalar_pred_against_zero(builder, value, fpred, icond): nullval = value.type(0) if isinstance(value.type, (ir.FloatType, ir.DoubleType)): isnull = fpred(value, nullval) elif isinstance(value.type, ir.IntType): isnull = builder.icmp_signed(icond, value, nullval) else: raise TypeError("unexpected value type %s" % (value.type,)) return isnull def is_scalar_zero(builder, value): """ Return a predicate representing whether *value* is equal to zero. """ return _scalar_pred_against_zero( builder, value, functools.partial(builder.fcmp_ordered, '=='), '==') def is_not_scalar_zero(builder, value): """ Return a predicate representing whether a *value* is not equal to zero. (not exactly "not is_scalar_zero" because of nans) """ return _scalar_pred_against_zero( builder, value, functools.partial(builder.fcmp_unordered, '!='), '!=') def is_scalar_zero_or_nan(builder, value): """ Return a predicate representing whether *value* is equal to either zero or NaN. """ return _scalar_pred_against_zero( builder, value, functools.partial(builder.fcmp_unordered, '=='), '==') is_true = is_not_scalar_zero is_false = is_scalar_zero def is_scalar_neg(builder, value): """ Is *value* negative? Assumes *value* is signed. """ return _scalar_pred_against_zero( builder, value, functools.partial(builder.fcmp_ordered, '<'), '<') def guard_null(context, builder, value, exc_tuple): """ Guard against *value* being null or zero. *exc_tuple* should be a (exception type, arguments...) tuple. """ with builder.if_then(is_scalar_zero(builder, value), likely=False): exc = exc_tuple[0] exc_args = exc_tuple[1:] or None context.call_conv.return_user_exc(builder, exc, exc_args) def guard_memory_error(context, builder, pointer, msg=None): """ Guard against *pointer* being NULL (and raise a MemoryError). """ assert isinstance(pointer.type, ir.PointerType), pointer.type exc_args = (msg,) if msg else () with builder.if_then(is_null(builder, pointer), likely=False): context.call_conv.return_user_exc(builder, MemoryError, exc_args) @contextmanager def if_zero(builder, value, likely=False): """ Execute the given block if the scalar value is zero. """ with builder.if_then(is_scalar_zero(builder, value), likely=likely): yield guard_zero = guard_null def is_pointer(ltyp): """ Whether the LLVM type *typ* is a struct type. """ return isinstance(ltyp, ir.PointerType) def get_record_member(builder, record, offset, typ): pval = gep_inbounds(builder, record, 0, offset) assert not is_pointer(pval.type.pointee) return builder.bitcast(pval, typ.as_pointer()) def is_neg_int(builder, val): return builder.icmp_signed('<', val, val.type(0)) def gep_inbounds(builder, ptr, *inds, **kws): """ Same as *gep*, but add the `inbounds` keyword. """ return gep(builder, ptr, *inds, inbounds=True, **kws) def gep(builder, ptr, *inds, **kws): """ Emit a getelementptr instruction for the given pointer and indices. The indices can be LLVM values or Python int constants. """ name = kws.pop('name', '') inbounds = kws.pop('inbounds', False) assert not kws idx = [] for i in inds: if isinstance(i, int): # NOTE: llvm only accepts int32 inside structs, not int64 ind = int32_t(i) else: ind = i idx.append(ind) return builder.gep(ptr, idx, name=name, inbounds=inbounds) def pointer_add(builder, ptr, offset, return_type=None): """ Add an integral *offset* to pointer *ptr*, and return a pointer of *return_type* (or, if omitted, the same type as *ptr*). Note the computation is done in bytes, and ignores the width of the pointed item type. """ intptr = builder.ptrtoint(ptr, intp_t) if isinstance(offset, int): offset = intp_t(offset) intptr = builder.add(intptr, offset) return builder.inttoptr(intptr, return_type or ptr.type) def memset(builder, ptr, size, value): """ Fill *size* bytes starting from *ptr* with *value*. """ fn = builder.module.declare_intrinsic('llvm.memset', (voidptr_t, size.type)) ptr = builder.bitcast(ptr, voidptr_t) if isinstance(value, int): value = int8_t(value) builder.call(fn, [ptr, value, size, bool_t(0)]) def memset_padding(builder, ptr): """ Fill padding bytes of the pointee with zeros. """ # Load existing value val = builder.load(ptr) # Fill pointee with zeros memset(builder, ptr, sizeof(builder, ptr.type), 0) # Store value back builder.store(val, ptr) def global_constant(builder_or_module, name, value, linkage='internal'): """ Get or create a (LLVM module-)global constant with *name* or *value*. """ if isinstance(builder_or_module, ir.Module): module = builder_or_module else: module = builder_or_module.module data = add_global_variable(module, value.type, name) data.linkage = linkage data.global_constant = True data.initializer = value return data def divmod_by_constant(builder, val, divisor): """ Compute the (quotient, remainder) of *val* divided by the constant positive *divisor*. The semantics reflects those of Python integer floor division, rather than C's / LLVM's signed division and modulo. The difference lies with a negative *val*. """ assert divisor > 0 divisor = val.type(divisor) one = val.type(1) quot = alloca_once(builder, val.type) with builder.if_else(is_neg_int(builder, val)) as (if_neg, if_pos): with if_pos: # quot = val / divisor quot_val = builder.sdiv(val, divisor) builder.store(quot_val, quot) with if_neg: # quot = -1 + (val + 1) / divisor val_plus_one = builder.add(val, one) quot_val = builder.sdiv(val_plus_one, divisor) builder.store(builder.sub(quot_val, one), quot) # rem = val - quot * divisor # (should be slightly faster than a separate modulo operation) quot_val = builder.load(quot) rem_val = builder.sub(val, builder.mul(quot_val, divisor)) return quot_val, rem_val def cbranch_or_continue(builder, cond, bbtrue): """ Branch conditionally or continue. Note: a new block is created and builder is moved to the end of the new block. """ bbcont = builder.append_basic_block('.continue') builder.cbranch(cond, bbtrue, bbcont) builder.position_at_end(bbcont) return bbcont def memcpy(builder, dst, src, count): """ Emit a memcpy to the builder. Copies each element of dst to src. Unlike the C equivalent, each element can be any LLVM type. Assumes ------- * dst.type == src.type * count is positive """ # Note this does seem to be optimized as a raw memcpy() by LLVM # whenever possible... assert dst.type == src.type with for_range(builder, count, intp=count.type) as loop: out_ptr = builder.gep(dst, [loop.index]) in_ptr = builder.gep(src, [loop.index]) builder.store(builder.load(in_ptr), out_ptr) def _raw_memcpy(builder, func_name, dst, src, count, itemsize, align): size_t = count.type if isinstance(itemsize, int): itemsize = ir.Constant(size_t, itemsize) memcpy = builder.module.declare_intrinsic(func_name, [voidptr_t, voidptr_t, size_t]) is_volatile = false_bit builder.call(memcpy, [builder.bitcast(dst, voidptr_t), builder.bitcast(src, voidptr_t), builder.mul(count, itemsize), is_volatile]) def raw_memcpy(builder, dst, src, count, itemsize, align=1): """ Emit a raw memcpy() call for `count` items of size `itemsize` from `src` to `dest`. """ return _raw_memcpy(builder, 'llvm.memcpy', dst, src, count, itemsize, align) def raw_memmove(builder, dst, src, count, itemsize, align=1): """ Emit a raw memmove() call for `count` items of size `itemsize` from `src` to `dest`. """ return _raw_memcpy(builder, 'llvm.memmove', dst, src, count, itemsize, align) def muladd_with_overflow(builder, a, b, c): """ Compute (a * b + c) and return a (result, overflow bit) pair. The operands must be signed integers. """ p = builder.smul_with_overflow(a, b) prod = builder.extract_value(p, 0) prod_ovf = builder.extract_value(p, 1) s = builder.sadd_with_overflow(prod, c) res = builder.extract_value(s, 0) ovf = builder.or_(prod_ovf, builder.extract_value(s, 1)) return res, ovf def printf(builder, format, *args): """ Calls printf(). Argument `format` is expected to be a Python string. Values to be printed are listed in `args`. Note: There is no checking to ensure there is correct number of values in `args` and there type matches the declaration in the format string. """ assert isinstance(format, str) mod = builder.module # Make global constant for format string cstring = voidptr_t fmt_bytes = make_bytearray((format + '\00').encode('ascii')) global_fmt = global_constant(mod, "printf_format", fmt_bytes) fnty = ir.FunctionType(int32_t, [cstring], var_arg=True) # Insert printf() try: fn = mod.get_global('printf') except KeyError: fn = ir.Function(mod, fnty, name="printf") # Call ptr_fmt = builder.bitcast(global_fmt, cstring) return builder.call(fn, [ptr_fmt] + list(args)) def snprintf(builder, buffer, bufsz, format, *args): """Calls libc snprintf(buffer, bufsz, format, ...args) """ assert isinstance(format, str) mod = builder.module # Make global constant for format string cstring = voidptr_t fmt_bytes = make_bytearray((format + '\00').encode('ascii')) global_fmt = global_constant(mod, "snprintf_format", fmt_bytes) fnty = ir.FunctionType( int32_t, [cstring, intp_t, cstring], var_arg=True, ) # Actual symbol name of snprintf is different on win32. symbol = 'snprintf' if config.IS_WIN32: symbol = '_' + symbol # Insert snprintf() try: fn = mod.get_global(symbol) except KeyError: fn = ir.Function(mod, fnty, name=symbol) # Call ptr_fmt = builder.bitcast(global_fmt, cstring) return builder.call(fn, [buffer, bufsz, ptr_fmt] + list(args)) def snprintf_stackbuffer(builder, bufsz, format, *args): """Similar to `snprintf()` but the buffer is stack allocated to size *bufsz*. Returns the buffer pointer as i8*. """ assert isinstance(bufsz, int) spacety = ir.ArrayType(ir.IntType(8), bufsz) space = alloca_once(builder, spacety, zfill=True) buffer = builder.bitcast(space, voidptr_t) snprintf(builder, buffer, intp_t(bufsz), format, *args) return buffer def normalize_ir_text(text): """ Normalize the given string to latin1 compatible encoding that is suitable for use in LLVM IR. """ # Just re-encoding to latin1 is enough return text.encode('utf8').decode('latin1') def hexdump(builder, ptr, nbytes): """Debug print the memory region in *ptr* to *ptr + nbytes* as hex. """ bytes_per_line = 16 nbytes = builder.zext(nbytes, intp_t) printf(builder, "hexdump p=%p n=%zu", ptr, nbytes) byte_t = ir.IntType(8) ptr = builder.bitcast(ptr, byte_t.as_pointer()) # Loop to print the bytes in *ptr* as hex with for_range(builder, nbytes) as idx: div_by = builder.urem(idx.index, intp_t(bytes_per_line)) do_new_line = builder.icmp_unsigned("==", div_by, intp_t(0)) with builder.if_then(do_new_line): printf(builder, "\n") offset = builder.gep(ptr, [idx.index]) val = builder.load(offset) printf(builder, " %02x", val) printf(builder, "\n") def is_nonelike(ty): """ returns if 'ty' is none """ return ( ty is None or isinstance(ty, types.NoneType) or isinstance(ty, types.Omitted) )