11 files changed, 273 insertions, 77 deletions
diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim
index b6caec760..1eb6caeb3 100644
--- a/compiler/ccgexprs.nim
+++ b/compiler/ccgexprs.nim
@@ -1278,11 +1278,11 @@ proc rawGenNew(p: BProc, a: var TLoc, sizeExpr: Rope; needsInit: bool) =
 
   if optTinyRtti in p.config.globalOptions:
     if needsInit:
-      b.r = ropecg(p.module, "($1) #nimNewObj($2)",
-          [getTypeDesc(p.module, typ), sizeExpr])
+      b.r = ropecg(p.module, "($1) #nimNewObj($2, NIM_ALIGNOF($3))",
+          [getTypeDesc(p.module, typ), sizeExpr, getTypeDesc(p.module, bt)])
     else:
-      b.r = ropecg(p.module, "($1) #nimNewObjUninit($2)",
-          [getTypeDesc(p.module, typ), sizeExpr])
+      b.r = ropecg(p.module, "($1) #nimNewObjUninit($2, NIM_ALIGNOF($3))",
+          [getTypeDesc(p.module, typ), sizeExpr, getTypeDesc(p.module, bt)])
     genAssignment(p, a, b, {})
   else:
     let ti = genTypeInfoV1(p.module, typ, a.lode.info)
@@ -2191,14 +2191,10 @@ proc genDestroy(p: BProc; n: PNode) =
     of tySequence:
       var a: TLoc
       initLocExpr(p, arg, a)
-      if optThreads in p.config.globalOptions:
-        linefmt(p, cpsStmts, "if ($1.p && !($1.p->cap & NIM_STRLIT_FLAG)) {$n" &
-          " #deallocShared($1.p);$n" &
-          "}$n", [rdLoc(a)])
-      else:
-        linefmt(p, cpsStmts, "if ($1.p && !($1.p->cap & NIM_STRLIT_FLAG)) {$n" &
-          " #dealloc($1.p);$n" &
-          "}$n", [rdLoc(a)])
+      linefmt(p, cpsStmts, "if ($1.p && !($1.p->cap & NIM_STRLIT_FLAG)) {$n" &
+        " #alignedDealloc($1.p, NIM_ALIGNOF($2));$n" &
+        "}$n",
+        [rdLoc(a), getTypeDesc(p.module, t.lastSon)])
     else: discard "nothing to do"
   else:
     let t = n[1].typ.skipTypes(abstractVar)
@@ -2217,7 +2213,7 @@ proc genDispose(p: BProc; n: PNode) =
       if elemType.destructor != nil:
         var destroyCall = newNodeI(nkCall, n.info)
         genStmts(p, destroyCall)
-      lineCg(p, cpsStmts, ["#nimRawDispose($#)", rdLoc(a)])
+      lineFmt(p, cpsStmts, "#nimRawDispose($1, NIM_ALIGNOF($2))", [rdLoc(a), getTypeDesc(p.module, elemType)])
     else:
       # ``nimRawDisposeVirtual`` calls the ``finalizer`` which is the same as the
       # destructor, but it uses the runtime type. Afterwards the memory is freed:
diff --git a/compiler/liftdestructors.nim b/compiler/liftdestructors.nim
index 30094875c..9b6c179db 100644
--- a/compiler/liftdestructors.nim
+++ b/compiler/liftdestructors.nim
@@ -515,7 +515,9 @@ proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
 
   if isFinal(elemType):
     addDestructorCall(c, elemType, actions, genDeref(x, nkDerefExpr))
-    actions.add callCodegenProc(c.g, "nimRawDispose", c.info, x)
+    var alignOf = genBuiltin(c.g, mAlignOf, "alignof", newNodeIT(nkType, c.info, elemType))
+    alignOf.typ = getSysType(c.g, c.info, tyInt)
+    actions.add callCodegenProc(c.g, "nimRawDispose", c.info, x, alignOf)
   else:
     addDestructorCall(c, elemType, newNodeI(nkStmtList, c.info), genDeref(x, nkDerefExpr))
     actions.add callCodegenProc(c.g, "nimDestroyAndDispose", c.info, x)
@@ -638,7 +640,9 @@ proc ownedRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
 
   if isFinal(elemType):
     addDestructorCall(c, elemType, actions, genDeref(x, nkDerefExpr))
-    actions.add callCodegenProc(c.g, "nimRawDispose", c.info, x)
+    var alignOf = genBuiltin(c.g, mAlignOf, "alignof", newNodeIT(nkType, c.info, elemType))
+    alignOf.typ = getSysType(c.g, c.info, tyInt)
+    actions.add callCodegenProc(c.g, "nimRawDispose", c.info, x, alignOf)
   else:
     addDestructorCall(c, elemType, newNodeI(nkStmtList, c.info), genDeref(x, nkDerefExpr))
     actions.add callCodegenProc(c.g, "nimDestroyAndDispose", c.info, x)
diff --git a/lib/core/typeinfo.nim b/lib/core/typeinfo.nim
index 9fd8ebf8c..5c4e1b601 100644
--- a/lib/core/typeinfo.nim
+++ b/lib/core/typeinfo.nim
@@ -108,7 +108,7 @@ when not defined(gcDestructors):
   proc newSeq(typ: PNimType, len: int): pointer {.importCompilerProc.}
   proc objectInit(dest: pointer, typ: PNimType) {.importCompilerProc.}
 else:
-  proc nimNewObj(size: int): pointer {.importCompilerProc.}
+  proc nimNewObj(size, align: int): pointer {.importCompilerProc.}
   proc newSeqPayload(cap, elemSize, elemAlign: int): pointer {.importCompilerProc.}
   proc prepareSeqAdd(len: int; p: pointer; addlen, elemSize, elemAlign: int): pointer {.
     importCompilerProc.}
@@ -178,7 +178,7 @@ proc invokeNew*(x: Any) =
   ## performs ``new(x)``. `x` needs to represent a ``ref``.
   assert x.rawType.kind == tyRef
   when defined(gcDestructors):
-    cast[ppointer](x.value)[] = nimNewObj(x.rawType.base.size)
+    cast[ppointer](x.value)[] = nimNewObj(x.rawType.base.size, x.rawType.base.align)
   else:
     var z = newObj(x.rawType, x.rawType.base.size)
     genericAssign(x.value, addr(z), x.rawType)
diff --git a/lib/system/arc.nim b/lib/system/arc.nim
index 46579eaef..0eecadd66 100644
--- a/lib/system/arc.nim
+++ b/lib/system/arc.nim
@@ -75,14 +75,13 @@ when defined(nimArcDebug):
 elif defined(nimArcIds):
   var gRefId: int
 
-proc nimNewObj(size: int): pointer {.compilerRtl.} =
-  let s = size + sizeof(RefHeader)
+proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
+  let hdrSize = align(sizeof(RefHeader), alignment)
+  let s = size + hdrSize
   when defined(nimscript):
     discard
-  elif compileOption("threads"):
-    result = allocShared0(s) +! sizeof(RefHeader)
   else:
-    result = alloc0(s) +! sizeof(RefHeader)
+    result = alignedAlloc0(s, alignment) +! hdrSize
   when defined(nimArcDebug) or defined(nimArcIds):
     head(result).refId = gRefId
     atomicInc gRefId
@@ -92,20 +91,18 @@ proc nimNewObj(size: int): pointer {.compilerRtl.} =
   when traceCollector:
     cprintf("[Allocated] %p result: %p\n", result -! sizeof(RefHeader), result)
 
-proc nimNewObjUninit(size: int): pointer {.compilerRtl.} =
+proc nimNewObjUninit(size, alignment: int): pointer {.compilerRtl.} =
   # Same as 'newNewObj' but do not initialize the memory to zero.
   # The codegen proved for us that this is not necessary.
-  let s = size + sizeof(RefHeader)
+  let hdrSize = align(sizeof(RefHeader), alignment)
+  let s = size + hdrSize
   when defined(nimscript):
     discard
-  elif compileOption("threads"):
-    var orig = cast[ptr RefHeader](allocShared(s))
   else:
-    var orig = cast[ptr RefHeader](alloc(s))
-  orig.rc = 0
+    result = cast[ptr RefHeader](alignedAlloc0(s, alignment) +! hdrSize)
+  head(result).rc = 0
   when defined(gcOrc):
-    orig.rootIdx = 0
-  result = orig +! sizeof(RefHeader)
+    head(result).rootIdx = 0
   when defined(nimArcDebug):
     head(result).refId = gRefId
     atomicInc gRefId
@@ -147,7 +144,7 @@ when not defined(nimscript) and defined(nimArcDebug):
     else:
       result = 0
 
-proc nimRawDispose(p: pointer) {.compilerRtl.} =
+proc nimRawDispose(p: pointer, alignment: int) {.compilerRtl.} =
   when not defined(nimscript):
     when traceCollector:
       cprintf("[Freed] %p\n", p -! sizeof(RefHeader))
@@ -155,27 +152,21 @@ proc nimRawDispose(p: pointer) {.compilerRtl.} =
       if head(p).rc >= rcIncrement:
         cstderr.rawWrite "[FATAL] dangling references exist\n"
         quit 1
-
-    when defined(gcOrc) and defined(nimArcDebug):
-      if (head(p).rc and 0b100) != 0:
-        cstderr.rawWrite "[FATAL] cycle root freed\n"
-        quit 1
-
     when defined(nimArcDebug):
       # we do NOT really free the memory here in order to reliably detect use-after-frees
       if freedCells.data == nil: init(freedCells)
       freedCells.incl head(p)
-    elif compileOption("threads"):
-      deallocShared(p -! sizeof(RefHeader))
     else:
-      dealloc(p -! sizeof(RefHeader))
+      let hdrSize = align(sizeof(RefHeader), alignment)
+      alignedDealloc(p -! hdrSize, alignment)
 
-template dispose*[T](x: owned(ref T)) = nimRawDispose(cast[pointer](x))
+template dispose*[T](x: owned(ref T)) = nimRawDispose(cast[pointer](x), T.alignOf)
 #proc dispose*(x: pointer) = nimRawDispose(x)
 
 proc nimDestroyAndDispose(p: pointer) {.compilerRtl, raises: [].} =
-  let d = cast[ptr PNimTypeV2](p)[].destructor
-  if d != nil: cast[DestructorProc](d)(p)
+  let rti = cast[ptr PNimTypeV2](p)
+  if rti.destructor != nil: 
+    cast[DestructorProc](rti.destructor)(p)
   when false:
     cstderr.rawWrite cast[ptr PNimTypeV2](p)[].name
     cstderr.rawWrite "\n"
@@ -183,7 +174,7 @@ proc nimDestroyAndDispose(p: pointer) {.compilerRtl, raises: [].} =
       cstderr.rawWrite "bah, nil\n"
     else:
       cstderr.rawWrite "has destructor!\n"
-  nimRawDispose(p)
+  nimRawDispose(p, rti.align)
 
 when defined(gcOrc):
   when defined(nimThinout):
@@ -216,7 +207,7 @@ proc GC_unref*[T](x: ref T) =
   if nimDecRefIsLast(cast[pointer](x)):
     # XXX this does NOT work for virtual destructors!
     `=destroy`(x[])
-    nimRawDispose(cast[pointer](x))
+    nimRawDispose(cast[pointer](x), T.alignOf)
 
 proc GC_ref*[T](x: ref T) =
   ## New runtime only supports this operation for 'ref T'.
diff --git a/lib/system/bitmasks.nim b/lib/system/bitmasks.nim
index 922ad5fb7..d7c55a4d9 100644
--- a/lib/system/bitmasks.nim
+++ b/lib/system/bitmasks.nim
@@ -15,7 +15,11 @@ const
   PageSize = 1 shl PageShift
   PageMask = PageSize-1
 
-  MemAlign = 16 # also minimal allocatable memory block
+  MemAlign = # also minimal allocatable memory block
+    when defined(useMalloc):
+      when defined(amd64): 16 
+      else: 8
+    else: 16
 
   BitsPerPage = PageSize div MemAlign
   UnitsPerPage = BitsPerPage div (sizeof(int)*8)
diff --git a/lib/system/deepcopy.nim b/lib/system/deepcopy.nim
index b9dc594fa..1f30b8427 100644
--- a/lib/system/deepcopy.nim
+++ b/lib/system/deepcopy.nim
@@ -163,7 +163,7 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
           when defined(nimSeqsV2):
             let typ = if mt.base.kind == tyObject: cast[PNimType](cast[ptr PNimTypeV2](s2)[].typeInfoV1)
                       else: mt.base
-            let z = nimNewObj(typ.size)
+            let z = nimNewObj(typ.size, typ.align)
             cast[PPointer](dest)[] = z
           else:
             # this version should work for any other GC:
diff --git a/lib/system/memalloc.nim b/lib/system/memalloc.nim
index 142762fe7..5f4304502 100644
--- a/lib/system/memalloc.nim
+++ b/lib/system/memalloc.nim
@@ -304,6 +304,87 @@ when hasAlloc and not defined(js):
     ## or other memory may be corrupted.
     deallocShared(p)
 
+  include bitmasks  
+
+  template `+!`(p: pointer, s: SomeInteger): pointer =
+    cast[pointer](cast[int](p) +% int(s))
+
+  template `-!`(p: pointer, s: SomeInteger): pointer =
+    cast[pointer](cast[int](p) -% int(s))
+
+  proc alignedAlloc(size, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = allocShared(size)
+      else:
+        result = alloc(size)
+    else: 
+      # allocate (size + align - 1) necessary for alignment,
+      # plus 2 bytes to store offset
+      when compileOption("threads"):
+        let base = allocShared(size + align - 1 + sizeof(uint16))
+      else:
+        let base = alloc(size + align - 1 + sizeof(uint16))
+      # memory layout: padding + offset (2 bytes) + user_data
+      # in order to deallocate: read offset at user_data - 2 bytes, 
+      # then deallocate user_data - offset
+      let offset = align - (cast[int](base) and (align - 1))
+      cast[ptr uint16](base +! (offset - sizeof(uint16)))[] = uint16(offset)
+      result = base +! offset
+
+  proc alignedAlloc0(size, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = allocShared0(size)
+      else:
+        result = alloc0(size)
+    else: 
+      # see comments for alignedAlloc
+      when compileOption("threads"):
+        let base = allocShared0(size + align - 1 + sizeof(uint16))
+      else:
+        let base = alloc0(size + align - 1 + sizeof(uint16))
+      let offset = align - (cast[int](base) and (align - 1))
+      cast[ptr uint16](base +! (offset - sizeof(uint16)))[] = uint16(offset)
+      result = base +! offset
+
+  proc alignedDealloc(p: pointer, align: int) {.compilerproc.} = 
+    if align <= MemAlign:
+      when compileOption("threads"):
+        deallocShared(p)
+      else:
+        dealloc(p)
+    else:      
+      # read offset at p - 2 bytes, then deallocate (p - offset) pointer
+      let offset = cast[ptr uint16](p -! sizeof(uint16))[]
+      when compileOption("threads"):
+        deallocShared(p -! offset)
+      else:
+        dealloc(p -! offset)
+
+  proc alignedRealloc(p: pointer, oldSize, newSize, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = reallocShared(p, newSize)
+      else:
+        result = realloc(p, newSize)
+    else:
+      result = alignedAlloc(newSize, align)
+      copyMem(result, p, oldSize)
+      alignedDealloc(p, align)
+
+  proc alignedRealloc0(p: pointer, oldSize, newSize, align: Natural): pointer =
+    if align <= MemAlign:
+      when compileOption("threads"):
+        result = reallocShared0(p, oldSize, newSize)
+      else:
+        result = realloc0(p, oldSize, newSize)
+    else:
+      result = alignedAlloc(newSize, align)
+      copyMem(result, p, oldSize)
+      zeroMem(result +! oldSize, newSize - oldSize)
+      alignedDealloc(p, align)
+
   {.pop.}
 
 # GC interface:
diff --git a/lib/system/orc.nim b/lib/system/orc.nim
index 3c2327fd5..28f8e5808 100644
--- a/lib/system/orc.nim
+++ b/lib/system/orc.nim
@@ -88,7 +88,7 @@ proc free(s: Cell; desc: PNimTypeV2) {.inline.} =
     else:
       cstderr.rawWrite "has dispose!\n"
 
-  nimRawDispose(p)
+  nimRawDispose(p, desc.align)
 
 proc nimTraceRef(q: pointer; desc: PNimTypeV2; env: pointer) {.compilerRtl, inline.} =
   let p = cast[ptr pointer](q)
diff --git a/lib/system/seqs_v2.nim b/lib/system/seqs_v2.nim
index d83a0009a..b7f24ecd5 100644
--- a/lib/system/seqs_v2.nim
+++ b/lib/system/seqs_v2.nim
@@ -35,10 +35,7 @@ proc newSeqPayload(cap, elemSize, elemAlign: int): pointer {.compilerRtl, raises
   # we have to use type erasure here as Nim does not support generic
   # compilerProcs. Oh well, this will all be inlined anyway.
   if cap > 0:
-    when compileOption("threads"):
-      var p = cast[ptr NimSeqPayloadBase](allocShared0(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize))
-    else:
-      var p = cast[ptr NimSeqPayloadBase](alloc0(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize))
+    var p = cast[ptr NimSeqPayloadBase](alignedAlloc0(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize, elemAlign))
     p.cap = cap
     result = p
   else:
@@ -65,20 +62,14 @@ proc prepareSeqAdd(len: int; p: pointer; addlen, elemSize, elemAlign: int): poin
       let oldCap = p.cap and not strlitFlag
       let newCap = max(resize(oldCap), len+addlen)
       if (p.cap and strlitFlag) == strlitFlag:
-        when compileOption("threads"):
-          var q = cast[ptr NimSeqPayloadBase](allocShared0(headerSize + elemSize * newCap))
-        else:
-          var q = cast[ptr NimSeqPayloadBase](alloc0(headerSize + elemSize * newCap))
+        var q = cast[ptr NimSeqPayloadBase](alignedAlloc0(headerSize + elemSize * newCap, elemAlign))
         copyMem(q +! headerSize, p +! headerSize, len * elemSize)
         q.cap = newCap
         result = q
       else:
         let oldSize = headerSize + elemSize * oldCap
         let newSize = headerSize + elemSize * newCap
-        when compileOption("threads"):
-          var q = cast[ptr NimSeqPayloadBase](reallocShared0(p, oldSize, newSize))
-        else:
-          var q = cast[ptr NimSeqPayloadBase](realloc0(p, oldSize, newSize))
+        var q = cast[ptr NimSeqPayloadBase](alignedRealloc0(p, oldSize, newSize, elemAlign))
         q.cap = newCap
         result = q
 
diff --git a/tests/arc/thard_alignment.nim b/tests/arc/thard_alignment.nim
new file mode 100644
index 000000000..e644572f0
--- /dev/null
+++ b/tests/arc/thard_alignment.nim
@@ -0,0 +1,146 @@
+discard """
+disabled: "arm64"
+cmd: "nim c --gc:arc $file"
+output: "y"
+"""
+
+{.passC: "-march=native".}
+
+proc isAlignedCheck(p: pointer, alignment: int) = 
+  doAssert (cast[uint](p) and uint(alignment - 1)) == 0
+
+proc isAlignedCheck[T](p: ref T, alignment: int) = 
+  isAlignedCheck(cast[pointer](p), alignment)
+
+type
+  m256d {.importc: "__m256d", header: "immintrin.h".} = object
+
+proc set1(x: float): m256d {.importc: "_mm256_set1_pd", header: "immintrin.h".}
+func `+`(a,b: m256d): m256d {.importc: "_mm256_add_pd", header: "immintrin.h".}
+proc `$`(a: m256d): string =  
+  result = $(cast[ptr float](a.unsafeAddr)[])
+
+
+var res: seq[seq[m256d]]
+
+for _ in 1..1000:
+  var x = newSeq[m256d](1)
+  x[0] = set1(1.0) # test if operation causes segfault
+  isAlignedCheck(x[0].addr, alignof(m256d))
+  res.add x
+
+var res2: seq[m256d]
+for i in 1..10000:
+  res2.setLen(res2.len + 1) # check if realloc works
+  isAlignedCheck(res2[0].addr, alignof(m256d))  
+
+proc lambdaGen(a, b: float, z: ref m256d) : auto =
+  var x1 = new(m256d)
+  var x2 = new(m256d)
+  isAlignedCheck(x1, alignof(m256d))
+  isAlignedCheck(x2, alignof(m256d))
+  x1[] = set1(2.0 + a)  
+  x2[] = set1(-23.0 - b)
+  let capturingLambda = proc(x: ref m256d): ref m256d =
+    var cc = new(m256d)
+    var bb = new(m256d)
+    isAlignedCheck(x1, alignof(m256d))
+    isAlignedCheck(x2, alignof(m256d))
+    isAlignedCheck(cc, alignof(m256d))
+    isAlignedCheck(bb, alignof(m256d))
+    isAlignedCheck(z, alignof(m256d))
+        
+    cc[] = x1[] + x1[] + z[]
+    bb[] = x2[] + set1(12.5) + z[]
+    
+    result = new(m256d)
+    isAlignedCheck(result, alignof(m256d))
+    result[] = cc[] + bb[] + x[]
+    
+  return capturingLambda
+
+var xx = new(m256d)
+xx[] = set1(10)
+isAlignedCheck(xx, alignOf(m256d))
+
+let f1 = lambdaGen(2.0 , 2.221, xx)
+let f2 = lambdaGen(-1.226 , 3.5, xx)
+isAlignedCheck(f1(xx), alignOf(m256d))
+isAlignedCheck(f2(xx), alignOf(m256d))
+
+
+#-----------------------------------------------------------------------------
+
+type
+  MyAligned = object of RootObj
+    a{.align: 128.}: float
+
+
+var f: MyAligned
+isAlignedCheck(f.addr, MyAligned.alignOf)
+
+var fref = new(MyAligned)
+isAlignedCheck(fref, MyAligned.alignOf)
+
+var fs: seq[MyAligned]
+var fr: seq[RootRef]
+
+for i in 0..1000:
+  fs.add MyAligned()
+  isAlignedCheck(fs[^1].addr, MyAligned.alignOf)
+  fs[^1].a = i.float
+
+  fr.add new(MyAligned)
+  isAlignedCheck(fr[^1], MyAligned.alignOf)
+  ((ref MyAligned)fr[^1])[].a = i.float
+
+for i in 0..1000:
+  doAssert(fs[i].a == i.float)
+  doAssert(((ref MyAligned)fr[i]).a == i.float)
+
+
+proc lambdaTest2(a: MyAligned, z: ref MyAligned): auto =
+  var x1: MyAligned
+  x1.a = a.a + z.a  
+  var x2: MyAligned
+  x2.a = a.a - z.a
+  let capturingLambda = proc(x: MyAligned): MyAligned =
+    var cc: MyAligned
+    var bb: MyAligned
+    isAlignedCheck(x1.addr, MyAligned.alignOf)
+    isAlignedCheck(x2.addr, MyAligned.alignOf)
+    isAlignedCheck(cc.addr, MyAligned.alignOf)
+    isAlignedCheck(bb.addr, MyAligned.alignOf)
+    isAlignedCheck(z, MyAligned.alignOf)
+        
+    cc.a = x1.a + x1.a + z.a
+    bb.a = x2.a - z.a
+    
+    isAlignedCheck(result.addr, MyAligned.alignOf)
+    result.a = cc.a + bb.a + x2.a
+    
+  return capturingLambda
+
+
+let q1 = lambdaTest2(MyAligned(a: 1.0), (ref MyAligned)(a: 2.0))
+let q2 = lambdaTest2(MyAligned( a: -1.0), (ref MyAligned)(a: -2.0))
+
+isAlignedCheck(rawEnv(q1), MyAligned.alignOf)
+isAlignedCheck(rawEnv(q2), MyAligned.alignOf)
+discard q1(MyAligned(a: 1.0))
+discard q2(MyAligned(a: -1.0))
+
+
+#-----------------------------------------------------------------------------
+
+block:
+  var s: seq[seq[MyAligned]]
+  for len in 0..128:
+    s.add newSeq[MyAligned](len)
+    for i in 0..<len:
+      s[^1][i] = MyAligned(a: 1.0)
+
+    if len > 0:
+      isAlignedCheck(s[^1][0].addr, MyAligned.alignOf)
+  
+echo "y"
diff --git a/tests/collections/thardalignmentconstraint.nim b/tests/collections/thardalignmentconstraint.nim
deleted file mode 100644
index e3a3081b9..000000000
--- a/tests/collections/thardalignmentconstraint.nim
+++ /dev/null
@@ -1,17 +0,0 @@
-discard """
-disabled: true
-"""
-
-# does not yet work
-
-{.passC: "-march=native".}
-
-type
-  m256d {.importc: "__m256d", header: "immintrin.h".} = object
-
-proc set1(x: float): m256d {.importc: "_mm256_set1_pd", header: "immintrin.h".}
-
-for _ in 1..1000:
-  var x = newSeq[m256d](1)
-  x[0] = set1(1.0) # test if operation causes segfault
-  doAssert (cast[uint](x[0].addr) and 31) == 0