From 0d95e2d3fc006d2b2439394f65b9eaa929a26de4 Mon Sep 17 00:00:00 2001 From: Abdullah Alperen <alperena@msu.edu> Date: Tue, 31 Jul 2018 18:00:48 -0400 Subject: [PATCH] SAI Changes, partially working --- PG-PuReMD/src/.lin_alg.c.swo | Bin 0 -> 61440 bytes PG-PuReMD/src/basic_comm.c | 40 +--- PG-PuReMD/src/charges.c | 2 + PG-PuReMD/src/driver.c | 2 +- PG-PuReMD/src/forces.c | 2 +- PG-PuReMD/src/init_md.c | 11 +- PG-PuReMD/src/lin_alg.c | 430 +++++++++++++++++------------------ 7 files changed, 214 insertions(+), 273 deletions(-) create mode 100644 PG-PuReMD/src/.lin_alg.c.swo diff --git a/PG-PuReMD/src/.lin_alg.c.swo b/PG-PuReMD/src/.lin_alg.c.swo new file mode 100644 index 0000000000000000000000000000000000000000..89ba9907eea5484d0b8ca15e58e0629f024eca49 GIT binary patch literal 61440 zcmeI537A|}nXrpZK>-<c5FM3E1EH7d?kvcbMLOw_1d~qaB#_XCvbw9fQ%NnIs_G;i zHc>{w(NPgcP{9FFa2Z_36&w*qL1zYVP(~321r%9D9nl|U-tR2;oO`RPyAp!J^H;%} z>N@xA`}dvY4$nMw*^1!Uo}&|d4oW17*9;8JJao^*^;;8(bbdTr&KA@1XsS;pyR|!4 ztY-7Gj+x3@;vDN?@2ZKx?27*0RZF_fUzPGmZ$4K{rSoGwBU5_Pdf3W=Ru1fZ4op-> zyN^3y?VQ=OW{Ts(+YSp}^5V5S?@!A`D+gLR(8_^U4zzNhl>@CDXyrgF2U<DsoXvsi z_<@OYiT!;~>CbyiO7@6-?`eKtX}&LxJ>T2>KGS?3V?40_>|@L?G~b_zJ%64tpETbm zm;kN&=Nt30&G+YG&-XRvdGkGUFa3vgzn?K5HQyhMJ%52QUuM4dGzqfqUueuvGT*O@ zJ>TD$m(BOPW6uvT=I5F3y-bE!{|_|gHvQ+vp8wpGPlvJhr`U6ooWw=udAxicH}0=! z@ch^2`400uUVe|6=NFjg@$!4rJl}4f$II^#^W4^tc=<hSo{t&#y|MCn$UM)P=kJR> zf6zQ%WS;L4%l}`Q=O>%zi(}6pFwZxe=kJO=|D}0u<Nsvr`7g}#bBz1#vGTd!JXd-) z?EF6V+&F7}wQ``91Falr<v=S3S~<|lfmRN*a-fw1tsHp9av+^aBt*9UgvisZ|6BR~ zx)&u9*T8z%2Y!u!`Xl%jTm~1yo8e8c0fyjoI2;axm%<*9fcp@BzXLbJ7vR%yIa~tg z!I`iLGB5;dp&u5&eCUQQ=!8EaXg>ir!qxB*xC*vF3C@5Ga5D76Y)C>E90>cuZxQAn zfrsH|a2wnT*TEOyGWZ~z2ixEca4IZ=*FqB7;Fa)qZ~(jzUI3p!k+2dDg;&8VVL#Xx z9zqFmGkhE_hqpr!3a|o>fdk+Plo_|c7vNvvJum@hzy>%OmO(ch3{O#je}?~p|AcGc zN;n&q!E51R6f8f7e~0VgI=Bqp2kYQO*b79j@FaW>{sZ0tbKvjbPGsKi!|iZ0d<@<T zCCI@>=z$J+HT)SF{R#L6d<iaq9nc4JpaTwvHaHBPq7MEF9)M4Rs*_&L5BTJY;}g|X zx?E0A2J?gVayFe0Iy-_wI-f6%v<H>kj%;aE8FvJoK{16XHag}e65`tao*xu?XU&`& zD8t^)pt>;|RMLg<d^RYjtGQB;s|2%V9vO7@61+G(Tm2jTtM-?er~p&hVzr#hR)z{) zT^opJU-trQGQMd;&(?H)ZbB^E&zTAPmwe|&+hYzk1@nT+WTl!dbT1grjSX~!_MOYo zQn@`C-=ssURMX|^P;SFqF`1{s#BU6^S1+O2Yz!x{OJ({_t<gRZWeu~kryRDkNA zNCJ5lD}$h>5DH=TyId>;T_#r~RzbJKFpRSb=FHySH7om9o)Dxn8FECKH5pZ9*M&7m z9g#>^v#Awnie-Cy(3h%=r^~7FbhVn4po{w7R3W`x{iUKDn9t~FexkCmy;9ATCaTn= z(0shiuBTD+AQ`kBnF-YYuP?SGy%4nbE<}EtfPAyUR6frRw9ovjJg*aD<6M_%Hebn- zX_Q5KBqD_l3Ro%^e{{rac3&))JeRvMlV<H?QdBCus08!p2Lo%DFAw8S8*NY`ZRC}g ztFA_f{n2`**E2gm8gzGeQ-0AFyGi9htPOj9N1NgFt#Eu(SC?zVHF0@;Q5SSBQmVb3 z(g3`sYP$s5Yfk)A84>fWZ6W;y%~X46U8qjPbc(v;%jYUptV!1DDn2|hvN>C=F&-%u zq(>->=Tez;HQl{nTdADS7_U~WT9#TQ7Q~h@qm2Ze!|d(L=CcKIu%f+J#+k}^zM}i+ zWYDR8c~2`Nr3vhZ&&_Y+ukQLIy^(gSAGqj;?wyfuaXh!Rq>||yx;zP8J0stsHy(M$ z)s5wBWn(FyNe0UMWKb=Pd-;*ij?(dzb7LF%8t3Umxu~5>wv1ETWFT`Uo6S%qPwCmm zvendhxipewK+`{+Rl2eQ&TX_SVn=0?vyw{&ln0@fs_8u8mnafdb;SEjc6efp0-lB0 znaY&LLU|Uc9`d{>0pqJSp-UWQ&>YR>tJyM5QtZk{Ked&lIys)LFdRosi6xzIB}->0 zy>igmS8L*W$Yf$`DVGT<+3Li&GR0wfoXBiX<%(Og_|tCur~J34bVU!t)IagRS}Lc< zvY|PZcQdgV$tIaDB_!r$0w@;)n<cK(aDR!lth520rSj9|lytobF_KTzO-0-UU6Qrx zDgQWks-KhAHdNR^C!Mzao7j{F*IvP8wj<~bW^3o#>9&nI@}DsmH(BdOBA3q=8CFeD z67oz_$SzJMso3;=R1rxB3j!4x<IM)^b&YGhl_=`iFmq7>(?`#=E~L^>U$WH8xzs56 zIhreGQo1#^2c`;jN^nV4#RxNLja5FlKqV;p7V1qll^M=gWNIO1TPme;wLMVt!X$Y` z>q%8R)X$Xi&P)t-U>!*!qN<Z}r+jh|F`h_7Lk}b|Iu<6=_^Owm<g>|gograua7n<W zE)9vdv!#aX^eh?P(zA6#io5BKc&*E6meiHmHm~_>e$d%&v$TT*jG5{pUdJEdUqWJG z?5AfX@k?i@q+X>_mzbaDww*}^c1j31(=9tRQ4xyd*Alv{HMv5rIF?#g94%2{WrkX3 zX4u;+F_Mo<zB3c^rI}n>+Pq90$?hIg1Lcvks53Td78XX-*o9N$Gp)^zn5tKy-lYpY zmErNB(7Z62N>@vT3bE;telFP1ldYz8CPu@NU-d#*>ZTg}X=cIFO~|}>R!mx$I)$oE zG~r)x7!9;XnyASCCm`f3MBWnl|4buqJ&MeK2YePj3EN;Eh%Vq)$n^KZCty2lgIVw( z^7*ge0k|5@hHW4+{IPHhyaWz_Tad$VhMV96&;#8dvihUQ(bvNHa1N}8bub&+;X!2S zd*Dv^5quo3f;BJ)o<xSe6K;g-;X1eiPKWid4i19bk)1Dx5g3LAa2&h>UJ8#PH~$Jg z3Kzhe;5?{60hYks$jmpvH{b@i94>)8EQZ&@0+<P%@Bni3ZSY<A4qOH!FbpTd;qWTB z3;FpD_yODm-+;G35k$@&gH><<><_<0e*PI;4r`$k{z#eL1$V#?;7jmD_!xW`Hi0U4 zQ|7z&CDmF!s%H8V)OmK|p(m%>R-K7=IbGaLL)H^izg@|L$OM?-MNLmSbhkx_vB_>= zru+#B%(ZKO_Bq6l4c*<Yfx20to+k}4ACiv6o7surth^L&Z3~PAEz)0G61SdIz%M-V zCb=qbns#Jns|~wlP8QCQm6;2ZM|cz&Hp6Kv6FfB~lg>iM&rm-i{bMBiD4d`j89x;D z!vfOMzDi;|JFp#-@M{Eot5va~p-_<s+xfoIqDaoU0%=;L&g75~n4T%M3W}LxB|x7u zGLc7`7g0EpzF75e!TBYS(>-gnSjee(q1YlnQ{_r+)`fYlWG{MBNueLmOsPb6(&asn z43*KfR%?Z+Qd()RP149r8GI+I*}!IolA;2M67rTjEj*np_N!fxE;7Z|Vi#2@SIzBI zJ{y;MV2;*to1KzNeuz`qoigjKiYg&Kx}t7Wv&2@s#imEgr2+~+)BPY1=^wP@M%@t& z3?agoOG$v&;-&wKv@lhceihM)YFJgHeoI%>)#dsD-Jv{l{Qz17Q<GheMq{MrT#-3{ zZPPIFrgZ!GmXNIXi+m>=fC+!sT<NInq+8YG&^!dii9%1bE0e@#(<2)LDc5WTsUsOo z6mu9CltmFUkaT+)N+sE%4_P>pt{|E;=&!{_|H`!smoH8REBaQiSv;5;SlQ1H)vR3~ zFkCLBGvbr>NQWUoxPod>^&5|p;+&5e(BiG*$eyE7hP6@Mm10~;gMM8Xe`D$>qlVJn z*0#4bp_aW8pdFdWXM$n$?nrLgOpr#i!Se!1M3Z2$w^0ut%$O0MD2U+XWuU2dE_d8t z61}iAhH`3>O%3sqiH@x^sEDkshgNU&*UeYFaP@u`=|&9Gc1<26#EN`;3DX5ZuG-Pk zb_fj3R@OkK(7jS5Nv36nQI}e+bzKeDKZ15QH$da7I})n5<he<3#pZmuRHAa}YOTis z2Jn$ceM?A!q{c2uQ&#kJE<w7Xku3X%U$LFF1{S=6tEu^8Dy%7EEA369=E~TGsN<uD z8Gmv_24~uXj^IeEA2Wrli^CPJSrswq@=Lc~Xib#KE*E?}P0fU5`aeqEbx4ER%*05x zeMVSRGf>Ykb(9iYHn2wjv3l)_IvtX!L1NSTVkUpdU<OK?rhQSxWg;NyP*b1wS+kKz zJDKZ^&2BgTcIpb;Ve78a4#c{uppE4^X$kpB`VgaeHS$E-ICO<&=kHQsf*|b=ukI1! zvtL;l^%jV#;kcuZopsDHvtD!jamOC#l%395EMjd(ss)9%WYjMcB?&(IfSicvvxi(I zElgcK?G#+qPt#FUl<R7)kZl*OK3NzfB+TAknSQ1TjB&gr|F`7-U6cE*E2&cQQLVtA z5wzt0mi#a4eNRU?wIXjz{%^_uc7uwYXM0L3H@Y_vf>aL`ADvtBznR>c*@HiMa5>tN z|2<Wv$OSF=-;VeGXpaC~G18HG%s=t=){_52O-f7tSK?+fny;4pZ{@kL_86&tdi{@; zu#q@h^1tbpoU|?mE%`r@IFibdWi43N|1UIR-8We0|2SL)ABGYf4sGyKI1p}U&Hq|B z2U4&E`rt$e;DzucYyEalz>WNUJ-iRz0V8lS$o~FUz(MdE*8d-dhv3`rE%*|A7EXcV zVGhW?{>Rx1@HMy|P6yd1@H$uwkFyUz_6FPz{|z6555qfP7}miIkp1}w!{5Tg`y~>; zfcL}2@E+)f``8O`FZ=**g||Z$ico-AupjINzh+Os9q<GACVUF6h4;Y@NWm#E0}g@b z!yniea0|%(f%9Q2oD9doi{QuX2lyPkA1;6$@CF!!ZuleX|F^-7@L9MD&V?0lB)kj` zhCi|=;32pHJ`QpR!kJKj0XPxl+`a6@-wS?6op=zw1F|1~E97AmRDCgZW%@5^X>La8 z{t|Piu~VCQX7WLyUgwjrW@p+z{O|B7sypZ9JlP+lK2BX14L<rye3Z?ZD96TSp;&9o zZ8%ytY=1?z6NzE1S89i+@+;hmXI<C&CHwvfbwRLMmLyE4Z>`t*J&eY4b2NTp*`<Ac z;MVeo#=SKRu@((phs+9!3c5p&6=F?3b%GAJ1`kPQ$wIf<->U7qe<R^z$@{I`)roy! zJZf+1csEf5UYM#hb|tdf;nR+-)J)l1N<PnW)v<mS*Jird8~3%<E}vOnbtN+1Fooj4 zv#6GGz2EVyw*FJJ(K7MZ@~x$4eG*ftQ=adlLf6Y~v6>xqDI$ehKv%7h7D!uAyQ5`^ zZNA<z-K`DW5fvMW+Duc6WzpEzsd>>TG*PMQcw4WIME#WvX;A*!*d;b$QzYNx0SS;; zo86jXrnddI(_A~emTY{ndrrij?SP{D#N~;~U$q1vzjRC`hIQ5;y&dPe+OmSk)@xQ< zwoK$kHnS&NEfjKhZ>gNCZY<apMeWY6Db_C1HPY3@<wlm_huPpAXi31W6-WJVv{oGJ z&0Lj42Gbik*laB45cI;A6-m5A>m^CEYU7qaxBB(lOv$?<TYx-2HiV&>1<B|$Z?#l+ zzOE{}2vZHFTkth08uER&>%PsGaYCwui-G$~%vD~oY_2w_nr&3Fyc%wx;>X;C*p!cJ z0-`#pvo}V@I=h!gJF!974OGq>wY_!f3QFga5C<;KJ6p?2qq6TqRXBE;KfOxlDhmzE z-!(*Tc|{amI1Ov1znU7cZTE(#l&bQOsu9~%W>)@L?AK=^)e@_z#Igk-%T_P7+Gu*| zitK;V!Pq8CiAkxcR<0ZQt~cVbQni{^TNqiHt!^}V9BiY*V6UKT{fLHb_k(WcoQgZ~ zV&WfGWq&)U?c>Ocdd<M}o~~l3<1ca!(c1Rwt+OVzsm@%>ixdIoT#Xm|Orw|*$?+Pq z<|>BrA>+sLrD2YwZf5Tb<;ZR!T?m`WT4jZQT%%EAhWHa{Ua}ZX*RSkryH*qDwtJw9 z6KiOAySwEmr?M>jY>S=xw2qFk&V+Crh^ETy^WxkY2R7=I(Jt2pyHD39SiWPuzB*1R zyN`6UX>OZcbV?BOu}!B{`KRi&ugINd^pJ+k`N*=~I$^d-q*g8OTf|nau)*rwueX!J zwpd9hbs&>vlM{y_vTXB`BW^l6=6qbht^2kG?!?-e(QCvLZMPtJb1^dlsm)xn{*Q>6 zcpfsH$p7~Kxm%I%zXNAM2F`%v;19_3Prz^BMz|i{3MI(FMmP=RJitNl0=Nsg{s#Cu zTnOjFQSb;d{M{h?|4)a7Am{X71up?P2k-!V1<ry~U==Kcd9XK#jQ<U|4!#0khBO=r z_wL7@c=!rj3ELnAeJ~Sxp$+8B{V$N+zYkxBe}m7%CGhtkXYQB4E8%~U*KdQ5z*TTD zyaz<KpM>qu2PcA@yKjS6!d+ytoV}NG0i%$GInWJq9^lKc0ZxNc;V`%tnf-Ej8(at% zKo#BqC&SS&2Y!pJ{u}r-oCEXVPn74AAbbADp#+Oz9vlIxyhFiVs-=9&rERHVHk-+2 zhUh*-kg#Wt<Y^&Y-W)d<Sv(;R<HI}Ms&`=Bj_}GV$KO-3oe=NMe5Q6UwB6X&$zd+f zAG*O=;YPMip)YFatM$Z;h>iSEkx@x5?vS{|22g+8^t>2|#KGm*uKGZ`tuXLz%;w5! zsP?u-4#;f;Y_#KPy2$?0u>lojTzJ9CmTj8|nXpV$m?2AP^h*}Y(?z9xJ{X-Sj;Iav zdL)ptY(B;kbeQb049nS(t#aVZof(#r6;nBD=1ey!v#3pN=IrTM)tjruI<%mzNNF)i zQ-?=$)yY6k@n=P>Hd6-gu(%v_*W+BRFUo#Udo-GDY_`S?G;*wWXWA)>wQja;;z(tY zn?elMyMX`+N=~tHu-qMTZStxBJEITy)XOqu9zWkco9OEQ8S8faegZw;5E(hVV8EnA z-6|m03;5}2?xKOebYAS@B?Km#{uKnOq+^wfLrC)9>OOTigQFXAr3gognOudZR&*87 zhal$%=5!=Sm~G`z{hxYlYLo5|rGd%JozwDRTl4<*4sUjDvKCEK!<uP`x<%+Co5d`L z(1%Ohg~86>ptCq!M(U6+<CRQ&R+E4UwrumoL;0Gmzvk$#qqT?9eU9eSavW5?JP!u5 zTPCt}M&;}l9`Y~l`4{K>Yg|h}i`>-#QibLCI9G0{dj^6|`D=J$)Vn^w-a4SOMrDW$ zD=s#jq&Q|G{T>N){ipp=+@am!*>-iJGw76aozAn7qE*0*Y)qGf&ULt4AG*m0o!J!U z_B%7#VtAz8IqqaDSc<zv)c2$|&!;(fzL`FtDkt{oc8cYVC?WEhwEQzHXSqg8&Wi!& zm1rtb96-sWHaT;ClOnk@U{i_<Vz^BpM;xm3j41+@BiE9b@`)X{_d6a6GI|!Dhqksm zRpm~e=MOm0pOEAkzwyvNuCFVg%5U9+hNtrNPpMpuPU{EuIJWC#o9qbH@L`7vh7!}o zOa8>0am@(BHe+O`{In;}B0V6jbx5w5k=c~GRmA+%CD_}kZ@C%Sm_?I@f<vqiY>*3x zxauIF;UN1Vm=6q`9;8(p<X)eF4Mtdu1f+TkKPb;Y2j(&>;!uKA%3zuCXq%E9&UWoD zF(LEnsiVb{dgiep_5Da<c|(u6w8+0+#Z+(!$9|^pfWFY?uJ*TyTb8W193_cg_9A;( z{7I*40pdt$p}1Km?o1+q#O)SfI)dP=vuf>;S`-FoZaad6jf88YdLBLf4Kn5)r_cw< zNu39}*nMS2B<JkodX{xG;!}BbLz11rON@IvhbMBJ-Z2FxvoU6<92L<sF;h9E<;z@6 zDHl{cU5#qCvAWEpL4;?Z8aKcO2ZKDdhAvAz{K}YPj%UpXLkg8~4K=}b`b9Q`r?NzW zrS&RYaS%`yCbA|Qo@S!a98AdRPxLP(IW9#$sFPVTiF0+$^kTaFP#Wo5T<V{aie!e! z|A!z4wj&dY{BQ5qzaLruZulv@6LRndm<xx%i{a16_J4w#;361<G#m{3!%fKe--h?Y z+hHLb4V`cV91bsn7r<l4`~M16sK7DM28Y4j$oZcHdmq4j{+<J}u75M!2p2#VDzF}2 z4R<5g%RK;Zg$k5m6?DRpurKTd_aoQe4L^l%zzuLI?0|9T1$)QdpOEE$40nR)0d9qV zh7ZA|a0$p-|LfpbI0oeIy@%jI_$vGZtcRmu2J8b*A@4s7cf!BJr{H674V(jKgRJ*2 zhxyP0-SBPX|FfVBTi`@^EgS*+fYl9r2c5v#a2C7<UJQG~wUqk@L6v`~52*b^ntl8u z%Rq>i>5-9%f;y2+ear^MQgKJNTtc&zpC}ZW#AI@ul$6?A$(FZfWfT)@%)r8Ds{i18 zvrL`PGU(fYoI2Eo>pDqmqI)V4Wa+|n$ELLudMdOb-~znO^yX&Vt$EhfW!0$G$7mR| zB&GDQ-f?Y4cWmjFOpxSoG&9Sp9Pi6iKebNRBxR#YGW{cYk==e2XO&31)t%aanf&;l zN;X~Qm?^zdX<|&ib&io!qZ9dj&=>Z(%1`mlcTJ=uYm|ahWo}8tHC+<A>$A4Ix(4P( zoXSLXa>ynaF+Tcuz4ypoA7lN}j-)Vr2{1OQhk?1A_J=&*r5&m4C5HYpJ!g?YDZI{5 zq9F+<o$<;?9++xb3EEjZC1<xWk(2&Z6`rc96`MmkZ=>~*gZaHK=T_)J%_dxw2veVl zj^s#qy`svC*sL>Xi^+{zGp($;p+uLGnzWgKB`!+H&{H|jbbXD<x`>G`h$#ns6}k$O zluoTDqp20`0IA}$Q!dtvZ7pBb9BLI~xgWs~VckyH*TomgHMYa>4D=k`uV%UmMoW*? zO_d+@<}k6sY#Yv!uhR6mN+F-l?a-P4s$;B>nK6`eX|7VMnBrC;Fc=5sL&a7rcEcss z&R8|7aEqB#ST2R&I*5ws7c!A5>bVs!aBuH(2$qS8PFYSE-_;i}jjYf5Q((2Tn^h#= zK3;}*aj0gb*H`3LKz`EQ*(OGC+1yrUFkUa^TECi3i{7Dofx1X3g`n+)Bu^;h88$X3 z*)XaM&88+~Y*9$G{1i$8-MJ&bg3Hwotchh96y`|pkumc@<AYL~B8hVpT4_x&Hjz$< z4XcB|6DtdMFEECqY*Co2Y^lW_--Z~ZD71}p`kSviLEB(<T%V1XK1ycNR#{VtWvxs( zrTwb@p20=eA&Ibxk0^re#tv>_GdDM)d=YJbaYr)X)Ng9ln!(hX^{eC>0%paS*hWmg zri(?Esknv3M1&!XnrB0kp<Ugu$~vWp`q(phhN4<aw@MXF1SL<KZak6!EIqU4rlen6 z*QBa53C@&?S(i@NGrGSd{U(XJzq7LdJq^&$^kN4csr=_%Lu*$%ln1^gUBu$IUxnLa zzNv|V2qSvUB<_oTtgBvFJEv%T-nyntH@vpREK4@ukVnIl0eyp8I@I@2NJJ??QCmov z*_xy#1!_uBv7;S(cgAu>WLW8Exgt?dUUs6iq;PdqNG?h!YKe-fEK2A_C2uGb%MI(K z%J1o_WpB{uS4+Y);Z2q%RCug-Rs;EwDX9j%F}+pa%vir#N=_x@pLoBn3|+Nye~IPj zFER1TdWwmVT3nO<j{bv;PvuZ%ifj9El}k)y8-T7!5hvn_o{=W3JNXtnu`W~ftV2`A z$A$00){_FQjHn$JQ8Ae7rTu4hW@g$$?U{G|RCHMN6%1xFT*#)2(zk8X6-YW7=}Nh9 zMQ(GJ%FLP>`E9!!w@*Q(*Q<W?=aPTYOsEvPHYOX_ZC9!da^2tX(J;HM3Z~&2X{coi zXM|FGG*<~#wN*bI>^?|mCIhL)F6l}~O0rDbj>sKkOF}$!sZn*axWg{uGD)=Z|Gj%? z`Je7teQrm-moosLhtI(l5cz))UIF{U#mM*>7=(T}0gi_sA<xTN{|DhBn1C@@23_!S zkbVC5z<<Ek;Pdb~cq5z$uZ08Pe&qZg!^hxkD1of~_rU$g`1iq=;2+_Q@G{s3Zbhz_ zb$_|@uODW^5wH*Z7I|J|{wv{Ka0<+Yn~~K&2Umcs>0bnL@84VDWY`OCMOK%+`u_qS zh6~|bI0H7ozVH*|@>}5Ra4lR07lQ2Fe?9cWB6t+}{J-IQ@JYBBWUXKB&=+}qJsbf0 z!;g^BZ-ranDj0&(;pL#p&XildFR26a@n*VKw4$4cv_fm{Is+O0qXIq`Y)s^<O3g)g z&!{enDWwk#15lGo9jL0$YAw>-^V=S@EzinDMiq41_{HjXenJ+=&}S9xA_%J=EP*fu zu~P~0jHs-VE7#e(TF-qXP*>-TOFa=$5h27vHprIC#9b5^+3h*5W9!w8#l|`&kB7#w zKg4Z26R1=BmM`mP+O%q|Oq@)I6xxl-_MtI<Sb>3doFPWsA}>R9g9Nf!2(7D3P7z$` z36);3hf&qe)k2@60oDe>_}?e9DAClg&LaB{m{jyJR8k-kfa+zGszKJe=hg{>U14U4 z*~NVgap!i{v69Rq%>94CkvTiXK(Ui9j-e`IUtwBR9m-r*x%~LM(ML%J>hS_~ov=+E z^<a@&E|C#MvMY>dZ7JeLt;^}!l(1eXU*`wQ`&RWWdfnnwW{d^xcJnhexbn2rioTOq z4km-6298qqX@?clU*6Meb*|dPCk{3+n@{=;0?D?8S|~Judy^t@AQ5Gg9!r6)4YDMN znWu@7$C!A>Q)K&tXF<mQ70v^{GBdg`Se$j%LYeNaJL{|zl9baPk+-W$c|F1eCGOP@ zx2kb5pjs2b5uPt3<DF_s)z@y@pjqWZk}Ytptjz(6V8~yUl(n$P#l6wyD%LlP9qy=g ziEtG|&m=_chbSldy%jQf5LKQZB9EurF1^C9cz!0yIT}&eX<enNU(B>cpJ`^!HZ^9N z9L>QIMru7jF&psNzC-rm#yZcl_m5T}%X`xGxp>FK_GrFHcGj9SHkBuSg&kt`m{iJE z9<4xP$ynX%W;91UMhwE%CcY&9WsgAv3$9!w6#H>yWKHB-HRSa}Q-!7u#du=BbzbYC zE_~vsi(mejrzhPqvC%3poe(A8^Ej%ui27J_*n<Y(%`qc73yD*0_Sc%!w`Wr{Wq~LW zRWD&mDfDbo#Sk~8Sau=%<GxVHauO+)Jrd1m7Kw?U>G-3k306k-^ISCUx=2)T>?71O z=(rs;FVmEalX)U;>#N`yz-g;y5*$jhQgv&F1Sw$JBNDC*9cracDrmZh{4DF3GO|Wi zF7UXL<u-k<jZ2o)mV1Mem4cMI>i1>hK{S{B92NzYs@4Pbt1wAG;Ublu+xx6~F`Z`x zSgjJO<rgouE+89-5yGT9Fr6oVx@Lm^zQVv;=2b~FeO@GI{j@jH@ea@pMLJ)dB=wda z)SHD;c~aH6f$b}5mW2|MdGd*tK=hY9@9Of0Kr!J{YfVgTcP?9Ih<+V+E)&JRd{nQ- zAfy)a6UxZ@M_1YvQu47=no*yWFY9Euqh`yE?A8xwO_!QC+|jg<7&rT`kZGI%ayH9> zmMmLIb(N>u&{FiOoVo#brp(~m50~n17w=Tn;$%*TDg|j24NE?nq0wKGmRb`rT^Ei9 zW~oVXKr+jl(}+F$k^1p$S!*<t4l7KX1lTEvjj?>YGL&ES#v0Y%xV1wx9g)#T&qAhH zHmuFvS`!I-ooo!UOL3cJV9|Lk^8Y-9#>b5OKWYTXTafQBfOB9z{0_PPL6E!u<=+2y zfZYAR8kWHvcopmga{vEN;fL@)a0gri7sGo%-uYL8H^3>d3Pdl^0qt-na=+{a5PiUh z;Bt5;6k!aO!6FFY6>tc=3_gQS;Ppl~An)k=EqZ{Tzzy&=I3J3Tf#aYD+Ckpc_j1?+ zevf`Y_6>Xl<otgg+TlQuI{^L>#^5!uC&)YZ<b423;ZXP%^8bfm2gv*M<Q;k+fq#HJ z91p)kzL$6DeFla=_VMotw;<<#0Qy1Rf%i3dFUX$#zXerB^#uH#Fq68_=#$BgrjhJV zNHnrw&FdV8vXA`l)p|?eigfkVEbr!<5x#7vd%;q>yD%9{CIdD`+U1>a?a=qg|647z zvwD_VwP^Z?+LK$}VCef5{o9M)avLq*Ol{6yzVfui$c3YF%P4zhdF9UBMAVylznIab zP;+d}@-mvOS+?-9-7muyGUid$+(;_i=hzX10VbR9R^`RA<!f2J=^Jb^L#&&oo%D8U zCySP@Tuwku<a(3ULlgy)g;EvTUBamx<^?ad5O%I^^e^jMGG#QQow8qC6;30vH9)=K zrdlrLyBCZUbh*j?=xj<<E-Cer7dK4%?o~#u8Dz)w%H^jnPAy-2>f+_8)r*!c?kBe! z*VbwBtA2;>hdYf2)@prSRl+`Q^ljA_PR+Bn$O3C=tf(fOWDGlbGc0hkB8NhagW;l{ zmu0#_wz{#D=?T`T!)}-r$9j6|>Z@X5|9{>Sh22G@SxnO@HP;i_=HKcvbd^*2@$Bg` z>U9&f{nd2+l<eHE(@m+gE?<1o8rKy$ze7Kjno9R?`e`_AbsdrWOLCFXon-({Z;Vg2 z%UI5cUboA3fhnP9yGT5fM&i-WlsUc0)SWL9*TBkw#dPw<F~UKy#F1Io5y|R=8T5O5 z>*-}NHQH;dm6eW*BNLt{^ky;~<CoUay7=p(3$hduP6XtV2>XsiFAF7q^aGPDUOLM= z#Mqn?HMY*t9CDPzJR>6`zscK#o-zdL?RE8{Y~Rj?!-j7`)Da429VRq4^$27iu*BSz z^LD+3Vp6_3{nwbyRTg#DXhGFm;Z^zzIemUOtHQj*_waGvzF0^HjH61hRP&&TeAY~a zu$LR9?+;QdS1le`u}Z5&C)nOJR#a=8XnFhtE%D58B``pkeojT_8rojOwVQL)c{rK0 z8Dif4H_hAZs1zLKT4#G?{d0Zvr_v9Y$)mO)CX<+`46RpZV>PyBr=Zf;o8<)Mi7P($ z7o}>Hzgokt%TG+@qqP?Go~$<-`KW!RxDYjbLOJIjT@&P21GPnT&#g{63V)kB%B9M` zh)T(uW{X0jFwVixaL=4p*4yo7>HuspyPe9yxllb5p)W{?&+>`pMYv`e5sy&|A|I=D z2w5ia_cv(WXKsUc$%*({-ye0d#dqzC;8SN>-9XKD=BSVM#on9UtTYlEbGj$uq1mOO z*8kg(ABT<nzs$&z7b5e^T7MV(4LpuqFZ=w@hfN@N{Jjk1JpUkw{Qd`IaJjGVB3KGX zz%P-*zX><OTKF?E_CLX9I0<e+uD%4ez-e$Cya>LFZ2cK{E698AWuLw5uP?)iZ~*)e z`C8WCH^EX!fULb=3X4JBrMCwp;A-USl`sSLfVUxIuZASZUijZ2U&}l4#^5-Rb@f9) z&dEPY-aG<7fDgchpz_P))zn{7IpQBR5<5ZRnIH8Bmn~V!u%C!Wq=DoRTZQsnHE?0A z?4*ogyR^T6Y%#4&F-^*%-G)uC|2>Td*6L1Xk#+VL%qkn?v{_~?>XbL3WzotWqeNk( ztvAt=+3TkCNNG5ik`6q4Uyco|-nVDplbMpfRsr!ToZ)mY=;!EtYI(A@MZ|rdzSyU? zQ4_A7OtAvaE2g}M%exma@C;S##<6%@{NELsW<}kakx4FUkiD!8H0i!4Zm{-E{l9Cb zbkQIq8`pey@xP}JNzMJ44j8=}ll(P<YZsdte%w3%H`_VYoUJ^;+p7A{IJ0}!*{S6T z-$wnvRVVzM)t>FfUZgTHZZ?|7q#wP-NX?*nMb?Pwb;$Js@AXrUcR|OavcDNcuCn;o z7ST`cTeNZ^`{LyVef5TGTg9ds1E%jQrt!drvm1;5*`mvP8p?XFQ$mDS3kseQ-75RH zo>iKAuJ|f&s5iA$sZJUVSt>1FLU^+`2|1~6`D%`7sPu@`l(2Q8@%QxAjw?k`b*R%D z-3tiUiTh4UY3%>>y}K0$J>`HGpd?JzPeoPGI28M&)p4thqHBw-j=Qd&Gg42Sl(l40 ztLIk3wZ94@>;Er7fL(%2E9d`DGQ#Y2$oyBrc326A!BfclUxiDd459-#0A!v299RuK z@I~Z)*~h;gWFNn*(?5*tFMIgk30as22f{<>0Iq?zKnl9ykI4Vuf)czG9zg#8Cai#i z;ZbD&@52}1?I3IKJ+L?2i4I@|91M>l_g@ch1G!W0DdhgEpdVfiayDMh#GeF*!ai_6 z^8WQO2`P9D><{OVKPr#v3;VXpHTvF-!*#3CPv7WUv$bb@<#nfKYg0^jSks721d>Yd zO9pFKty(<jtA!giuE^YFt=%=?`MOrko0Na8ts9MVktct7>7&)1P2Zih<}lCvP7+)9 zn^0o5`kMxQ$TPN8tQq;W`F#itV(%?oHnLH>Z=sRR=azQMbug_xYtOEkQ~j-A(@b)j zn>0PQ)hMklA<?Ms3Ab`JSYY$Fv5NeU2%mV|$p2*{-~JGJ|7xhhiLejcgT?nj1&)TF zAnShx{t4a-C&1mv`m%TbUGQ@F88ZG?;R<*Yi0uEr$oSs|k^8TP_rf_K_xH*E{5;71 z0C_Lp&B*^(zzE3seYvmyFn9tzfb7S=8U6*%gDeb!oaa9p=D=(?6!wS5&=1@Lcf;pF z&i0=UqC4n@Bpd|yqdT}8{u5*`{~JN}^H-n@%Ruh!I}~0G|ATJf4v@3{vd{l~cr%;= zs~`zo@KSgld>7rrHE;#|Gn@~n!x8W*cqRN9UBnY`4_pOfFbc<myp!((AZG(E26-3X z66k|h!5=8g-+(I9T78978`pX=L+U7qCoN@WV>C3=+uu^Iu~UxN(du8lRA`Q6%Ari9 z(zVAg<p2*$NReH7Y_JnY>P{=YW6)jN<YE|^Zt2Lez8P8k)X~ATGa^iyedNU6Kat<t zStqge{Nn{g4gCunpGnuGJNM2aS~o2(+!5{XJ@P^h<IH>UB*WF3t@nJ3%HmmGF?bJc z{u6EGn5cI9N!O13G9;d?t>!XvpRk$ZPFHMp*PiWX){b+xS2ndK#{u%O)H2TPbEu=r z=>_gJPX;r%Fo@-hRFw<K>;R<ZOLi})xgK8*%&q8KEemM_EBj@EmrZ{hFXP@m&O|I? z@BZS!lU5F{=o?tHxPwhRdgq{xgM8Z?thjKVyR5iVa%8p*u+g<bUe?Lw<AbLzUNqZX zd6f`fSw^mwCr??tB@&;dealZuEnl{J4N)<+)WJWw<xI}OX61#AGdsvz^-?-*NisPV zJN>dWzA7d0V4EWAH(nCeHP9(Jz*$LcEXlQ9=tl;)6=J)(<H66VrF|SRShTjkuS4#c zviaA)c;VV5Ca#U{{ld#w1~kgjXC!J~a$O@g)rk5WxeH&iTt4oxU(0y=SgEQjTx1WT z-Zl~%>3Y5!>S98oqFxoE_m0+FQ7Fc8MPbowSM2STS0_$)3xTy=-U+*Mk6knVswl_u zgYjSPhj7h^){!^&Psvr5w!xbFB;;xdYgDAVDlc^g$a@INNiJsTWYM*=x05^c)zdnc z`RiuNPTo{0k2CqBnB6v4d#0K&{X=C!Zo)5>Id!uIe}_}@Hy3bBrYLCNF7KpFat3;Q zBX{KwXRB%bOg3b59X?m#vrI~>QR&u?n%Ga<rkd>t*G>7Gz7uDvC<c7gE)%^YyIRV# zH(N#9Zq}w%X$z5U;ogF#e##PSIeb9?RYjZ9I~d<1eh#}+V79bmU%%TuZcUv%6H4Qo z^Nod^y0Vv3I_C8XCVc7Fk_kzI%8f-!n%_^3$FiNP(FS+Bh1#`w_m||Zd>W`9TiX$J z_W1wSW}g34{%(+&acjGAY3ez-lSFSxG>ZeK4#yAv?aG$KAZTJ!B7(d(HBp-q*Curz z8ZW2l@9B%*pX5i~`oC&IZ?H9yC6uPOCi13{*ycoB+iml@JNBbc`?u?IYoA$@sWv9c zdQbD)Nty{wO&)e-b0VXLobG?tZZj15{}qhsCm=V<`u}-GUi=L5{-@z0csp!_VMv4A z1Go_8fxHXgx99>MfrsI8xD3vPNmvUjVE_(?HjsA%{2pDw&)}QzHMkV2Py~55;GS?7 z`hy?Cm*FaqI{>!8W{~#;K8P;iKDZYyhs)qRI1S`ofv<)`;BVj{xC4E{ZEz8Yeqj^H z`TwKfAow$Sg?|G%kMM4I7gXUS*dOHXz`fxvbP2K_;S+EnoC|V);P25LJPcREN8ube z73RYcuqQl$zChju_)#c95thS#@GW!$a`*qmun-<W4{$I19DW9$f_K21LGBHZcmH)j zJA5C#!0qr^xCY(`TVVpKFbGQ_fF~&HN8n+2KU@mipaAlYz`3C6gQ*J*zHCd3c12!? zGHxVT=8ro>22WTQuEjOh8HAQaYY;z|=WdP-gHsbInSU@ZO9otO9JW@r%5mmVNm@1n z(I`vgh-2tg_W~Pp7iDqTNDbo4=uIUpOj|S_H4#v6Kz>eUov(SAD(ifE&19YUC7+$q zCMA>${)y&xM6F~CJX#Gx6-`4mP;9j@qFT~^sxtISHq_iS^fQdF#Czw|OX8v?EKw}Y z*TRx0nIhrVDw*nfA?hXT1%KD^!Y@eif_P6Tbx}m?l#oaEv)yRDcmuPN<Z2>kb$!uR zsGJdr&{;e}wfaj5(M4#x)qjcc&Qv-rhDtP1E%>*2nFmB9_8?3b60{b0#Wjki!Z?4p zbZL9ay|u>=ZBKGGAyJi&2-bb>J;XC~$SB{GK#WF7EZ7$p$=|Ej^3pgZ@OvjHqYc3& zi!AkOkInvi#qCF4;^lIno*(fNs?|<Dv&BH!=ZbPU1vgV><dg_EUGd%;?vvUgb-~wi zNn+%)^SVxPwHtIyo_0)pv>z+z*U@>3m8YTi6z9$9KE-AO)n=sHL}VJL<`n1tXPP86 z(q6`bifJ+v5`wNxPC}T)>nm7~s3Y7JI3%yNL}vH7Ul&|6nWi>`T1eBzbojfWGDu<? zHhI%iB!lUieL}o(iD}f{>yLJ}7jT_bqqbguWQ+F3e#taiDRS}idQ%suXIQUY>nokz zyHK<aQFq#_PZ?a(9_kmx%oLchq&nc3CCrSaIt8Khl{1{ea=MbJDF{4}XwYdAY<sv6 z<n`4gTr3fdh$R=TCbQIp<mI08*)t?dXLn12q>mg{y`w}LMYi0$t=1&Wic7oK5X&T7 zwAhJg0zDhH8Zko3ileSg(?wl*w@dSXG|Y|W|E?i4TKe<{x9R5m(<WiMEKzmtua_kv z|1U(qTZqgj^8cg}_I`p~FYf>lJwO3E;81uq$UFaj4D!yucf$lMfrDT_*cYxve!mLd z4R3|Fz!1!Y17UBt2O0itFb6u|aFBQY{RUZG&I^1OZU8wm@b@6^|Lcc6AOUi>-|cWE zTmmB3p8@OO7)XMgC3p-Tg==9mY=Wh*07S0e6P`qV|9|jba1-QU1cu=x=!ZqH5FSU4 z{|KB7CxXcJ3HTi{{P*CC@Im+hycssbYB(N_gC~*gZ-Q^br{PK%f;F%jj)eo@`EVPu zz3dnGCfo=+U=rrR{mAn7!Oid?xD;fMz#Bl`9k33bqFny~@}9rVunAU!DsNNPPwz{b zynK3ln^-;-i?s`*em@bNJh+>L)KaN9mrpZu0~R?K_bqpeyx!E<_UL}&6^&&R^8{II zRU?EMeLI+?>(PUCU{-~!+VR;pk1!D?nX9;gh*$OGQ~f0tn7zy|M>1bl>C9VjSx$4a zH5~93g2>3}dpf}2!ALfp<Ajvssru%q)f2ez;}P%@)}EP3)-aO1a8ds9o^J8HmT2r! zR{P7yCMmi$?1ovJH-&FzUfx7SZEk*E+pt)Zte<gVYiiVzWRJj9%|{iVUJUm_7<c<@ zWZ~O;vSqGuA&cCed0CtHcFKabXKvr^-rE~lX;$mZ?oI7^C05s2P?lS?;%mfqJz0mS z){6bhq>v8OE9%9i(jxjdaqXtXUEj;U=GyLVUDi$L8I`#jt34<8Pjv8)S$$XK?j~Ke z*O&~Q);Bn?Y+%WTpkFp8Em|Ui8gHH$%gQU~M$<W7xE{10$vckZHjoZ3xMF2r0<mG( zzQ}99Ie8QJRPr9{^0A1yC<c7Is%RBXk@{H)C$VB%hb(_Jq0km<@?p*23uQP};eF0h z@jOF}!fC8cr8reNL4%QU)P1z4{tn$I1GSnTcHPOqDUuqdRuUF%iYEQK*+}Tkq}JJ! z>jSl7#2+`2dfd<{BG5eEoL<VC?Gi3_?l18%Oj`E>t38aAN_F>_3On24rGnA!ER`k- zMbcHLP<%mU=L$uQa*^_w>6ZsWNmw2U$#_=`?k_R-3dED}JkwCI-lg{GNJ&Su$JUjr zU3I-xQc{h#m>O^StB$vIl+rTHj2l-tiM-#V*({V8%BCYpOiZyWlPrcKp11@lt11Cz z3MRsQuN}G6M%#pVDW7QW#&&SH_%&U8XPp(9trD*Wt<IlB>8MPr1C49F#My3`J=xIf zs9OjTRkz8->GfXLCs!zu;<qEyN1R)4NB@#c8Ob+V1)qG}{DRcoH$E(5mN0ygu8t Xq^KV#YseC-)jlC8_l)oC+Q$C@>fDrh literal 0 HcmV?d00001 diff --git a/PG-PuReMD/src/basic_comm.c b/PG-PuReMD/src/basic_comm.c index c47d6fdc..d22009ff 100644 --- a/PG-PuReMD/src/basic_comm.c +++ b/PG-PuReMD/src/basic_comm.c @@ -42,7 +42,7 @@ static void int_packer( void *dummy, mpi_out_data *out_buf ) for ( i = 0; i < out_buf->cnt; ++i ) { - out[i] = buf[ out_buf->index[i] ]; + out[i] += buf[ out_buf->index[i] ]; } } @@ -100,7 +100,7 @@ static void int_unpacker( void *dummy_in, void *dummy_buf, mpi_out_data *out_buf for ( i = 0; i < out_buf->cnt; ++i ) { - buf[ out_buf->index[i] ] += in[i]; + buf[ out_buf->index[i] ] = in[i]; } } @@ -267,43 +267,21 @@ void Dist( const reax_system * const system, mpi_datatypes * const mpi_data, #if defined(DEBUG) fprintf( stderr, "p%d dist: entered\n", system->my_rank ); #endif - fprintf( stdout, "p%d dist: entered\n", system->my_rank ); - fflush(stdout); - comm = mpi_data->comm_mesh3D; out_bufs = mpi_data->out_buffers; pack = Get_Packer( buf_type ); - fprintf( stdout, "Packer Gottten\n" ); - fflush(stdout); - - /*for(d = 0; d < 4101; d++) - if(*(int * )(buf + 4001) != -1) - fprintf(stdout,"%d ", *(int * )(buf + d)); - fprintf(stdout, "\n");*/ - for ( d = 0; d < 3; ++d ) { - fprintf( stdout, "Iteration %d\n", d ); - fflush(stdout); - /* initiate recvs */ nbr1 = &system->my_nbrs[2 * d]; if ( nbr1->atoms_cnt ) { - fprintf( stdout, "Irecv %d\n", d ); - fflush(stdout); - fprintf(stdout,"%d %d\n",nbr1->atoms_str, nbr1->atoms_cnt); - fprintf(stdout,"%d\n", *(int *)Get_Buffer_Offset( buf, nbr1->atoms_str + nbr1->atoms_cnt - 1, buf_type )); - fprintf(stdout,"%d %d\n", nbr1->rank, 2*d + 1); - fflush(stdout); MPI_Irecv( Get_Buffer_Offset( buf, nbr1->atoms_str, buf_type ), nbr1->atoms_cnt, type, nbr1->rank, 2 * d + 1, comm, &req1 ); } - fprintf( stdout, "A %d\n", d ); - fflush(stdout); nbr2 = &system->my_nbrs[2 * d + 1]; if ( nbr2->atoms_cnt ) @@ -311,8 +289,6 @@ void Dist( const reax_system * const system, mpi_datatypes * const mpi_data, MPI_Irecv( Get_Buffer_Offset( buf, nbr2->atoms_str, buf_type ), nbr2->atoms_cnt, type, nbr2->rank, 2 * d, comm, &req2 ); } - fprintf( stdout, "B %d\n", d ); - fflush(stdout); /* send both messages in dimension d */ if ( out_bufs[2 * d].cnt ) @@ -321,8 +297,6 @@ void Dist( const reax_system * const system, mpi_datatypes * const mpi_data, MPI_Send( out_bufs[2 * d].out_atoms, out_bufs[2 * d].cnt, type, nbr1->rank, 2 * d, comm ); } - fprintf( stdout, "C %d\n", d ); - fflush(stdout); if ( out_bufs[2 * d + 1].cnt ) { @@ -330,8 +304,6 @@ void Dist( const reax_system * const system, mpi_datatypes * const mpi_data, MPI_Send( out_bufs[2 * d + 1].out_atoms, out_bufs[2 * d + 1].cnt, type, nbr2->rank, 2 * d + 1, comm ); } - fprintf( stdout, "D %d\n", d ); - fflush(stdout); if( nbr1->atoms_cnt ) { @@ -343,8 +315,6 @@ void Dist( const reax_system * const system, mpi_datatypes * const mpi_data, } } - fprintf( stdout, "Out of For Loop\n" ); - fflush(stdout); #if defined(DEBUG) fprintf( stderr, "p%d dist: done\n", system->my_rank ); @@ -355,9 +325,6 @@ void Dist( const reax_system * const system, mpi_datatypes * const mpi_data, void Coll( const reax_system * const system, mpi_datatypes * const mpi_data, void *buf, int buf_type, MPI_Datatype type ) { - fprintf(stdout, "Coll\n"); - fflush(stdout); - int d; mpi_out_data *out_bufs; MPI_Comm comm; @@ -376,9 +343,6 @@ void Coll( const reax_system * const system, mpi_datatypes * const mpi_data, for ( d = 2; d >= 0; --d ) { - fprintf(stdout, "loop %d\n",d); - fflush(stdout); - /* initiate recvs */ nbr1 = &system->my_nbrs[2 * d]; diff --git a/PG-PuReMD/src/charges.c b/PG-PuReMD/src/charges.c index b9c8cccb..bda01bb3 100644 --- a/PG-PuReMD/src/charges.c +++ b/PG-PuReMD/src/charges.c @@ -843,6 +843,8 @@ static void QEq( reax_system * const system, control_params * const control, exit( INVALID_INPUT ); break; } + + #if defined(LOG_PERFORMANCE) if ( system->my_rank == MASTER_NODE ) diff --git a/PG-PuReMD/src/driver.c b/PG-PuReMD/src/driver.c index 2040fa45..e820c2b3 100644 --- a/PG-PuReMD/src/driver.c +++ b/PG-PuReMD/src/driver.c @@ -65,6 +65,6 @@ int main( int argc, char* argv[] ) { MPI_Finalize( ); } - + return (ret == PUREMD_SUCCESS) ? 0 : (-1); } diff --git a/PG-PuReMD/src/forces.c b/PG-PuReMD/src/forces.c index 0e18c533..67aa0995 100644 --- a/PG-PuReMD/src/forces.c +++ b/PG-PuReMD/src/forces.c @@ -1140,7 +1140,7 @@ int Compute_Forces( reax_system * const system, control_params * const control, Print_Force_Files( system, control, data, workspace, lists, out_control, mpi_data ); #endif } - + return ret; } diff --git a/PG-PuReMD/src/init_md.c b/PG-PuReMD/src/init_md.c index 1364ab2f..f5b8d2bb 100644 --- a/PG-PuReMD/src/init_md.c +++ b/PG-PuReMD/src/init_md.c @@ -942,8 +942,8 @@ static void Finalize_MPI_Datatypes( mpi_datatypes * const mpi_data ) Deallocate_MPI_Buffers( mpi_data ); - ret = MPI_Type_free( &mpi_data->mpi_atom_type ); - Check_MPI_Error( ret, "Finalize_MPI_Datatypes::mpi_data->mpi_atom_type" ); + //ret = MPI_Type_free( &mpi_data->mpi_atom_type ); + //Check_MPI_Error( ret, "Finalize_MPI_Datatypes::mpi_data->mpi_atom_type" ); ret = MPI_Type_free( &mpi_data->boundary_atom_type ); Check_MPI_Error( ret, "Finalize_MPI_Datatypes::mpi_data->boundary_atom_type" ); ret = MPI_Type_free( &mpi_data->mpi_rvec ); @@ -963,23 +963,18 @@ void Finalize( reax_system * const system, control_params * const control, output_controls * const out_control, mpi_datatypes * const mpi_data, const int output_enabled ) { + if ( control->tabulate ) { Finalize_LR_Lookup_Table( system, control, workspace, mpi_data ); } - if ( output_enabled == TRUE ) { Finalize_Output_Files( system, control, out_control ); } - Finalize_Lists( control, lists ); - Finalize_Workspace( system, control, workspace ); - Finalize_Simulation_Data( system, control, data, out_control ); - Finalize_System( system, control, data ); - Finalize_MPI_Datatypes( mpi_data ); } diff --git a/PG-PuReMD/src/lin_alg.c b/PG-PuReMD/src/lin_alg.c index 3eb6cc58..420286db 100644 --- a/PG-PuReMD/src/lin_alg.c +++ b/PG-PuReMD/src/lin_alg.c @@ -254,7 +254,9 @@ void setup_sparse_approx_inverse( const reax_system * const system, storage * co bucketlist = NULL; samplelist = NULL; - for(i = 0; i < system->n; i++) + fprintf(stdout,"bigN = %d\n",system->bigN); + + /*for(i = 0; i < system->n; i++) { fprintf(stdout, "\n%d\n",i); for(pj = A->start[i]; pj < A->end[i]; pj++) @@ -281,7 +283,7 @@ void setup_sparse_approx_inverse( const reax_system * const system, storage * co m = 0; for( i = 0; i < A->n; ++i ) - { + {//TODO: add if m += A->end[i] - A->start[i]; } @@ -292,7 +294,10 @@ void setup_sparse_approx_inverse( const reax_system * const system, storage * co { for( pj = A->start[i]; pj < A->end[i]; ++pj ) { - local_entries[m++] = A->entries[pj].val; + if(A->entries[pj].j < system->bigN) + { + local_entries[m++] = A->entries[pj].val; + } } } @@ -586,7 +591,8 @@ void setup_sparse_approx_inverse( const reax_system * const system, storage * co for ( pj = A->start[i]; pj < A->end[i]; ++pj ) { - if ( ( A->entries[pj].val >= threshold ) || ( A->entries[pj].j == i ) ) + if ( ( ( A->entries[pj].val >= threshold ) || ( A->entries[pj].j == i ) ) + && A->entries[pj].j < system->bigN ) { A_spar_patt->entries[size].val = A->entries[pj].val; A_spar_patt->entries[size].j = A->entries[pj].j; @@ -639,66 +645,44 @@ int sparse_approx_inverse(const reax_system * const system, storage * const work start = Get_Time( ); - if ( A_app_inv == NULL ) { - fprintf(stdout, "1 %d %d %d\n",A->n, A->n_max, A->m); - fflush(stdout); Allocate_Matrix(A_app_inv, A_spar_patt->n, A_spar_patt->n_max, A_spar_patt->m ); } else if ( (A_app_inv->m) < (A_spar_patt->m) ) { - fprintf(stdout, "2 %d %d %d\n",A_spar_patt->n, A_spar_patt->n_max, A_spar_patt->m); - fflush(stdout); Reallocate_Matrix( A_app_inv, A_spar_patt->n, A_spar_patt->n_max, A_spar_patt->m ); } - /*for(i = 0; i < system->n; i++) - { - fprintf(stdout, "\n%d\n",i); - for(pj = A_spar_patt->start[i]; pj < A_spar_patt->end[i]; pj++) - { - fprintf(stdout, "%d %.2lf ---",A_spar_patt->entries[pj].j, A_spar_patt->entries[pj].val); - } - fprintf(stdout,"\n"); - }*/ - - fprintf(stdout,"\n\n\nSAI computation started\n"); - fprintf(stdout, "rank = %d\n", system->my_rank); - fprintf(stdout, "n = %d N = %d\n",system->n, system->N); - fprintf(stdout, "nnz of sparsity pattern = %d\n",A_spar_patt->m); - fflush(stdout); - - j_recv1 = NULL; j_recv2 = NULL; val_recv1 = NULL; val_recv2 = NULL; - mark = (int *) malloc( sizeof(int) * (system->N + 1) ); + mark = (int *) malloc( sizeof(int) * (system->bigN + 1) ); + + // row_needed and row_nnz are used for Dist and Coll functions + // so their size should be N rather than bigN row_needed = (int *) malloc( sizeof(int) * (system->N + 1) ); row_nnz = (int *) malloc( sizeof(int) * (system->N + 1) ); - j_list = (int **) malloc( sizeof(int *) * (system->N + 1) ); - val_list = (real **) malloc( sizeof(real *) * (system->N + 1) ); + j_list = (int **) malloc( sizeof(int *) * (system->bigN + 1) ); + val_list = (real **) malloc( sizeof(real *) * (system->bigN + 1) ); - for ( i = 0; i <= system->N; ++i ) + for ( i = 0; i <= system->bigN; ++i ) { - mark[ i ] = -1; - row_needed[ i ] = -1; - row_nnz[ i ] = 0; + mark[i] = -1; + row_needed[i] = -1; + row_nnz[i] = 0; } /* mark the atoms that already have their row stored in the local matrix */ for ( i = 0; i < system->n; ++i ) { atom = &system->my_atoms[i]; - fprintf(stdout, "%d ",atom->orig_id); mark[ atom->orig_id ] = i; } - fprintf(stdout,"\n"); - fflush(stdout); /*find the atoms that are not marked but needed, * meaning we need to communicate their row*/ @@ -707,288 +691,295 @@ int sparse_approx_inverse(const reax_system * const system, storage * const work for ( pj = A_spar_patt->start[i]; pj < A_spar_patt->end[i]; ++pj ) { atom = &system->my_atoms[ A_spar_patt->entries[pj].j ]; - //fprintf(stdout,"Spar_patt column: %d - orig_id: %d\n",A_spar_patt->entries[pj].j, atom->orig_id); - //fflush(stdout); - if(atom->orig_id > system->n) - fprintf(stdout, "adsadsads %d ",atom->orig_id); if( mark[ atom->orig_id ] == -1) { - fprintf(stdout, "asd %d ",atom->orig_id); row_needed[ A_spar_patt->entries[pj].j ] = atom->orig_id; } } } - fflush(stdout); - - Dist( system, mpi_data, row_needed, INT_PTR_TYPE, MPI_INT ); + + // TODO: Dist or Coll + // Announce the rows that you need but you do not have + Coll( system, mpi_data, row_needed, INT_PTR_TYPE, MPI_INT ); - fprintf(stdout, "First Dist is done\n"); - fflush(stdout); + for(i = 0; i<system->bigN;i++) + { + fprintf(stdout,"%d ", row_needed[i]); + } - /* fill in the nnz of the lines that will be collected by other processes */ - for( i = 0; i < system->N; ++i ) + /* fill in the nnz of the rows of the original charge matrix + * that will be collected by other processes */ + for( i = 0; i < system->bigN; ++i ) { - if( row_needed[i] !=-1 && mark[ row_needed[i] ] != -1) + if( row_needed[i] != -1 && mark[ row_needed[i] ] != -1 ) { - row_nnz[i] = A->end[ mark[ row_needed[i] ] ] - A->start[ mark[ row_needed[i] ] ]; - + for( pj = A->start[i]; pj < A->end[i]; ++pj) + { + if( A->entries[pj].j < system->bigN ) + { + ++row_nnz[i]; + } + } } } - fprintf(stdout, "Required NNZ calculation is done\n"); - fflush(stdout); - /* announce the nnz's in each row to allocota space */ - Coll( system, mpi_data, row_nnz, INT_PTR_TYPE, MPI_INT ); + // TODO: Dist or Coll + /* Announce the nnz's in each row that will be communicated later */ + Dist( system, mpi_data, row_nnz, INT_PTR_TYPE, MPI_INT ); - fprintf(stdout, "First Call is done\n"); - fflush(stdout); - comm = mpi_data->comm_mesh3D; out_bufs = mpi_data->out_buffers; - for ( d = 2; d >= 0; --d ) + + fprintf(stdout,"before manual dist function\n"); + + /* Dist, not Coll */ + for ( d = 0; d < 3; ++d) { - fprintf(stdout, "iterative loop for coll %d\n", d); - fflush(stdout); + + fprintf(stdout,"iteration number: %d\n", d); flag1 = 0; flag2 = 0; /* initiate recvs */ nbr1 = &system->my_nbrs[2 * d]; - - if ( out_bufs[2 * d].cnt ) + if ( nbr1->atoms_cnt ) { + /* calculate the total data that will be received */ cnt = 0; - for( i = 0; i < out_bufs[2 * d].cnt; ++i ) + for( i = nbr1->atoms_str; i < system->bigN && i < (nbr1->atoms_str + nbr1->atoms_cnt); ++i ) { - cnt += row_nnz[ out_bufs[2 * d].index[i] ]; - // fprintf(stdout,"Outbuffs to be sent id:%d nnz:%d\n",out_bufs[2 * d+1].index[i], row_nnz[ out_bufs[2 * d+1].index[i] ]); - // fflush(stdout); + if( row_needed[i] != -1 && mark[ row_needed[i] ] == -1) + { + cnt += row_nnz[i]; + } } - j_recv1 = (int *) malloc( sizeof(int) * cnt ); - val_recv1 = (real *) malloc( sizeof(real) * cnt ); - + /* initiate Irecv */ if( cnt ) { flag1 = 1; + j_recv1 = (int *) malloc( sizeof(int) * cnt ); + val_recv1 = (real *) malloc( sizeof(real) * cnt ); MPI_Irecv( j_recv1, cnt, MPI_INT, nbr1->rank, 2 * d + 1, comm, &req1 ); MPI_Irecv( val_recv1, cnt, MPI_DOUBLE, nbr1->rank, 2 * d + 1, comm, &req2 ); } } - nbr2 = &system->my_nbrs[2 * d + 1]; - fprintf(stdout, "Irecv is completed\n"); + fprintf(stdout,"after first Irecv, cnt = %d\n",cnt); fflush(stdout); - if ( out_bufs[2 * d + 1].cnt ) + nbr2 = &system->my_nbrs[2 * d + 1]; + if ( nbr1->atoms_cnt ) { + /* calculate the total data that will be received */ cnt = 0; - for( i = 0; i < out_bufs[2 * d+1].cnt; ++i ) + for( i = nbr2->atoms_str; i < system->bigN && i < (nbr2->atoms_str + nbr2->atoms_cnt); ++i ) { - cnt += row_nnz[ out_bufs[2 * d+1].index[i] ]; + if( row_needed[i] != -1 && mark[ row_needed[i] ] == -1) + { + cnt += row_nnz[i]; + } } - j_recv2 = (int *) malloc( sizeof(int) * cnt ); - val_recv2 = (real *) malloc( sizeof(real) * cnt ); - + /* initiate Irecv */ if( cnt ) { flag2 = 1; + j_recv2 = (int *) malloc( sizeof(int) * cnt ); + val_recv2 = (real *) malloc( sizeof(real) * cnt ); MPI_Irecv( j_recv2, cnt, MPI_INT, nbr2->rank, 2 * d, comm, &req3 ); MPI_Irecv( val_recv2, cnt, MPI_DOUBLE, nbr2->rank, 2 * d, comm, &req4 ); } } - fprintf(stdout, "Second Irecv is completed\n"); + fprintf(stdout,"after second Irecv, cnt = %d\n", cnt); fflush(stdout); /* send both messages in dimension d */ - if ( nbr1->atoms_cnt ) + if( out_bufs[2 * d].cnt ) { cnt = 0; - for( i = nbr1->atoms_str; i < nbr1->atoms_str + nbr1->atoms_cnt; ++i) + for( i = 0; i < out_bufs[2 * d].cnt; ++i ) { - atom = &system->my_atoms[i]; - - if(mark[ atom->orig_id ] != -1) - { - cnt += A->end[ mark[ atom->orig_id ] ] - A->start[ mark[ atom->orig_id ] ]; - } - else + if( out_bufs[2 * d].index[i] < system->bigN ) { - cnt += row_nnz[i]; + if( row_needed[ out_bufs[2 * d].index[i] ] != -1) + { + cnt += row_nnz[ out_bufs[2 * d].index[i] ]; + } } } - fprintf(stdout, "Inside of First Send\n"); + fprintf(stdout,"cnt = %d\n", cnt); fflush(stdout); - - j_send = (int *) malloc( sizeof(int) * cnt ); - val_send = (real *) malloc( sizeof(real) * cnt ); - - cnt = 0; - for( i = nbr1->atoms_str; i < nbr1->atoms_str + nbr1->atoms_cnt; ++i) + if( cnt ) { - atom = &system->my_atoms[i]; - if(row_needed[ atom->orig_id ] != -1) + j_send = (int *) malloc( sizeof(int) * cnt ); + val_send = (real *) malloc( sizeof(real) * cnt ); + + cnt = 0; + for( i = 0; i < out_bufs[2 * d].cnt; ++i ) { - if(mark[ atom->orig_id ] != -1) + if( out_bufs[2 * d].index[i] < system->bigN ) { - for( pj = A->start[ mark[ atom->orig_id ] ]; pj < A->end[ mark[ atom->orig_id ] ]; ++pj) + if( row_needed[ out_bufs[2 * d].index[i] ] != -1) { - j_send[cnt] = A->entries[pj].j; - val_send[cnt] = A->entries[pj].val; - cnt++; - } - } - else - { - for( pj = 0; pj < row_nnz[i]; ++pj) - { - j_send[cnt] = j_list[i][pj]; - val_send[cnt] = val_list[i][pj]; - cnt++; + if( mark[ row_needed[ out_bufs[2 * d].index[i] ] ] != -1) + { + for( pj = A->start[ out_bufs[2 * d].index[i] ]; pj < A->end[ out_bufs[2 * d].index[i] ]; ++pj ) + { + if( A->entries[pj].j < system->bigN) + { + j_send[cnt] = A->entries[pj].j; + val_send[cnt] = A->entries[pj].val; + cnt++; + } + } + } + else + { + for( pj = 0; pj < row_nnz[ out_bufs[2 * d].index[i] ]; ++pj ) + { + j_send[cnt] = j_list[ out_bufs[2 * d].index[i] ][pj]; + val_send[cnt] = val_list[ out_bufs[2 * d].index[i] ][pj]; + cnt++; + } + } } } } - } - fprintf(stdout, "Still Inside of First Send\n"); - fflush(stdout); - - if( cnt ) - { MPI_Send( j_send, cnt, MPI_INT, nbr1->rank, 2 * d, comm ); MPI_Send( val_send, cnt, MPI_DOUBLE, nbr1->rank, 2 * d, comm ); } - } - fprintf(stdout, "Outside of First Send\n"); - fflush(stdout); - if ( nbr2->atoms_cnt ) + fprintf(stdout,"after first Send\n"); + fflush(stdout); + + if( out_bufs[2 * d + 1].cnt ) { - fprintf(stdout, "Beginning of Second Send\n"); - fflush(stdout); cnt = 0; - for( i = nbr2->atoms_str; i < nbr2->atoms_str + nbr2->atoms_cnt; ++i) + for( i = 0; i < out_bufs[2 * d + 1].cnt; ++i ) { - atom = &system->my_atoms[i]; - - if(mark[ atom->orig_id ] != -1) + if( out_bufs[2 * d + 1].index[i] < system->bigN ) { - cnt += A->end[ mark[ atom->orig_id ] ] - A->start[ mark[ atom->orig_id ] ]; - } - else - { - cnt += row_nnz[i]; + if( row_needed[ out_bufs[2 * d + 1].index[i] ] != -1) + { + cnt += row_nnz[ out_bufs[2 * d + 1].index[i] ]; + } } } - - fprintf(stdout, "Inside of Second Send\n"); - fflush(stdout); - - j_send = (int *) malloc( sizeof(int) * cnt ); - val_send = (real *) malloc( sizeof(real) * cnt ); - fprintf(stdout, "Inside of Second Send after Malloc\n"); + fprintf(stdout,"cnt = %d\n", cnt); fflush(stdout); - cnt = 0; - for( i = nbr2->atoms_str; i < nbr2->atoms_str + nbr2->atoms_cnt; ++i) + if( cnt ) { - atom = &system->my_atoms[i]; + j_send = (int *) malloc( sizeof(int) * cnt ); + val_send = (real *) malloc( sizeof(real) * cnt ); - if(row_needed[ atom->orig_id ] != -1) + cnt = 0; + for( i = 0; i < out_bufs[2 * d + 1].cnt; ++i ) { - if(mark[ atom->orig_id ] != -1) + if( out_bufs[2 * d + 1].index[i] < system->bigN ) { - for( pj = A->start[ mark[ atom->orig_id ] ]; pj < A->end[ mark[ atom->orig_id ] ]; ++pj) + if( row_needed[ out_bufs[2 * d + 1].index[i] ] != -1) { - j_send[cnt] = A->entries[pj].j; - val_send[cnt] = A->entries[pj].val; - cnt++; - } - } - else - { - for( pj = 0; pj < row_nnz[i]; ++pj) - { - j_send[cnt] = j_list[i][pj]; - val_send[cnt] = val_list[i][pj]; - cnt++; + if( mark[ row_needed[ out_bufs[2 * d + 1].index[i] ] ] != -1) + { + for( pj = A->start[ out_bufs[2 * d + 1].index[i] ]; pj < A->end[ out_bufs[2 * d + 1].index[i] ]; ++pj ) + { + if( A->entries[pj].j < system->bigN) + { + j_send[cnt] = A->entries[pj].j; + val_send[cnt] = A->entries[pj].val; + cnt++; + } + } + } + else + { + for( pj = 0; pj < row_nnz[ out_bufs[2 * d + 1].index[i] ]; ++pj ) + { + j_send[cnt] = j_list[ out_bufs[2 * d + 1].index[i] ][pj]; + val_send[cnt] = val_list[ out_bufs[2 * d + 1].index[i] ][pj]; + cnt++; + } + } } } } - } - fprintf(stdout, "Still Inside of Second Send\n"); - fprintf(stdout, "cnt = %d\n",cnt); - fflush(stdout); - - if ( cnt ) - { - MPI_Send( j_send, cnt, MPI_INT, nbr2->rank, 2 * d + 1, comm ); - MPI_Send( val_send, cnt, MPI_DOUBLE, nbr2->rank, 2 * d + 1, comm ); + MPI_Send( j_send, cnt, MPI_INT, nbr1->rank, 2 * d + 1, comm ); + MPI_Send( val_send, cnt, MPI_DOUBLE, nbr1->rank, 2 * d + 1, comm ); } } - if ( out_bufs[2 * d].cnt && flag1 ) + fprintf(stdout,"after the seconf Send\n"); + fflush(stdout); + + if( flag1 ) { MPI_Wait( &req1, &stat1 ); MPI_Wait( &req2, &stat2 ); + cnt = 0; - for( i = 0; i < out_bufs[2 * d].cnt; ++i ) + for( i = nbr1->atoms_str; i < system->bigN && i < (nbr1->atoms_str + nbr1->atoms_cnt); ++i ) { - j_list[ out_bufs[2 * d].index[i] ] = (int *) malloc( sizeof(int) * row_nnz[ out_bufs[2 * d].index[i] ] ); - val_list[ out_bufs[2 * d].index[i] ] = (real *) malloc( sizeof(real) * row_nnz[ out_bufs[2 * d].index[i] ] ); - - for( pj = 0; pj < row_nnz[ out_bufs[2 * d].index[i] ]; ++pj) + if( row_needed[i] != -1 && mark[ row_needed[i] ] == -1) { - j_list[ out_bufs[2 * d].index[i] ][pj] = j_recv1[cnt]; - val_list[ out_bufs[2 * d].index[i] ][pj] = val_recv1[cnt]; - cnt++; + j_list[i] = (int *) malloc( sizeof(int) * row_nnz[i] ); + val_list[i] = (real *) malloc( sizeof(real) * row_nnz[i] ); + + for( pj = 0; pj < row_nnz[i]; ++pj ) + { + j_list[i][pj] = j_recv1[cnt]; + val_list[i][pj] = val_recv1[cnt]; + cnt++; + } } } } - if ( out_bufs[2 * d + 1].cnt && flag2 ) + if( flag2 ) { MPI_Wait( &req3, &stat3 ); MPI_Wait( &req4, &stat4 ); + cnt = 0; - for( i = 0; i < out_bufs[2 * d + 1].cnt; ++i ) + for( i = nbr2->atoms_str; i < system->bigN && i < (nbr2->atoms_str + nbr2->atoms_cnt); ++i ) { - for( pj = 0; pj < row_nnz[ out_bufs[2 * d + 1].index[i] ]; ++pj) + if( row_needed[i] != -1 && mark[ row_needed[i] ] == -1) { - j_list[ out_bufs[2 * d + 1].index[i] ][pj] = j_recv2[cnt]; - val_list[ out_bufs[2 * d + 1].index[i] ][pj] = val_recv2[cnt]; - cnt++; + j_list[i] = (int *) malloc( sizeof(int) * row_nnz[i] ); + val_list[i] = (real *) malloc( sizeof(real) * row_nnz[i] ); + + for( pj = 0; pj < row_nnz[i]; ++pj ) + { + j_list[i][pj] = j_recv2[cnt]; + val_list[i][pj] = val_recv2[cnt]; + cnt++; + } } } } - fprintf(stdout,"Package placed into correct positions\n"); - fflush(stdout); - } - - fprintf(stdout,"before initilaization of SAI computation variables\n"); - fflush(stdout); + } A_app_inv->start[A_app_inv->n] = A_spar_patt->start[A_spar_patt->n]; + X = (char *) malloc( sizeof(char) * system->bigN ); + Y = (char *) malloc( sizeof(char) * system->bigN ); + pos_x = (int *) malloc( sizeof(int) * system->bigN ); + pos_y = (int *) malloc( sizeof(int) * system->bigN ); - X = (char *) malloc( sizeof(char) * A->n ); - Y = (char *) malloc( sizeof(char) * A->n ); - pos_x = (int *) malloc( sizeof(int) * A->n ); - pos_y = (int *) malloc( sizeof(int) * A->n ); - - for ( i = 0; i < A->n; ++i ) + for ( i = 0; i < system->bigN; ++i ) { X[i] = 0; Y[i] = 0; @@ -996,7 +987,8 @@ int sparse_approx_inverse(const reax_system * const system, storage * const work pos_y[i] = 0; } - fprintf(stdout, "SAI computation\n"); + fprintf(stdout,"right before SAI computation\n"); + fflush(stdout); for ( i = 0; i < A_spar_patt->n; ++i ) { @@ -1006,25 +998,27 @@ int sparse_approx_inverse(const reax_system * const system, storage * const work /* find column indices of nonzeros (which will be the columns indices of the dense matrix) */ for ( pj = A_spar_patt->start[i]; pj < A_spar_patt->end[i]; ++pj ) { - j_temp = A_spar_patt->entries[pj].j; - + atom = &system->my_atoms[j_temp]; Y[j_temp] = 1; pos_y[j_temp] = N; ++N; /* for each of those indices * search through the row of full A of that index */ - + /* the case where the local matrix has that index's row */ - if(mark[j_temp] != -1) + if( mark[ atom->orig_id ] != -1) { - for ( k = A->start[ mark[j_temp] ]; k < A->end[ mark[j_temp] ]; ++k ) + for ( k = A->start[ j_temp ]; k < A->end[ j_temp ]; ++k ) { - /* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */ - X[A->entries[k].j] = 1; + if( A->entries[k].j < system->bigN ) + { + /* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */ + X[A->entries[k].j] = 1; + } } - } + } /* the case where we communicated that index's row */ else @@ -1039,7 +1033,7 @@ int sparse_approx_inverse(const reax_system * const system, storage * const work /* enumerate the row indices from 0 to (# of nonzero rows - 1) for the dense matrix */ identity_pos = M; - for ( k = 0; k < A->n; k++) + for ( k = 0; k < system->bigN; k++) { if ( X[k] != 0 ) { @@ -1066,13 +1060,14 @@ int sparse_approx_inverse(const reax_system * const system, storage * const work /* change the value if any of the column indices is seen */ /* it is in the original list */ - if( mark[ pos_x[d_i] ] != -1) + atom = &system->my_atoms[ pos_x[d_i] ]; + if( mark[ atom->orig_id ] != -1) { - for ( d_j = A->start[ mark[ pos_x[d_i] ] ]; d_j < A->end[ mark[ pos_x[d_i] ] ]; ++d_j ) + for ( d_j = A->start[ pos_x[d_i] ]; d_j < A->end[ pos_x[d_i] ]; ++d_j ) { - if ( Y[A->entries[d_j].j] == 1 ) + if ( A->entries[d_j].j < system->bigN && Y[ A->entries[d_j].j ] == 1 ) { - dense_matrix[d_i * N + pos_y[A->entries[d_j].j]] = A->entries[d_j].val; + dense_matrix[ d_i * N + pos_y[ A->entries[d_j].j ] ] = A->entries[d_j].val; } } } @@ -1087,7 +1082,6 @@ int sparse_approx_inverse(const reax_system * const system, storage * const work } } } - } /* create the right hand side of the linear equation @@ -1108,9 +1102,6 @@ int sparse_approx_inverse(const reax_system * const system, storage * const work lda = N; ldb = nrhs; - fprintf(stdout,"Before LAPACKE\n"); - fflush(stdout); - info = LAPACKE_dgels( LAPACK_ROW_MAJOR, 'N', m, n, nrhs, dense_matrix, lda, e_j, ldb ); @@ -1140,7 +1131,7 @@ int sparse_approx_inverse(const reax_system * const system, storage * const work /* empty variables that will be used next iteration */ free( dense_matrix ); free( e_j ); - for ( k = 0; k < A->n; ++k ) + for ( k = 0; k < system->bigN; ++k ) { X[k] = 0; Y[k] = 0; @@ -1149,10 +1140,6 @@ int sparse_approx_inverse(const reax_system * const system, storage * const work } } - - fprintf(stdout, "SAI computation over\n"); - fflush(stdout); - free( pos_y); free( pos_x); free( Y ); @@ -1179,8 +1166,6 @@ static void apply_preconditioner( const reax_system * const system, const storag const control_params * const control, const real * const y, real * const x, const int fresh_pre, const int side ) { - //fprintf(stdout,"apply_preconditioner working\n"); - //fflush(stdout); /* no preconditioning */ if ( control->cm_solver_pre_comp_type == NONE_PC ) { @@ -1415,8 +1400,6 @@ int dual_CG( const reax_system * const system, const control_params * const cont const real tol, rvec2 * const x, const int fresh_pre ) { - fprintf(stdout,"dual_cg working\n"); - fflush(stdout); int i, j, n, N, iters; rvec2 tmp, alpha, beta; rvec2 my_sum, norm_sqr, b_norm, my_dot; @@ -1684,7 +1667,7 @@ int dual_CG( const reax_system * const system, const control_params * const cont { fprintf( stderr, "[WARNING] Dual CG convergence failed (%d iters)\n", i + 1 + iters ); } - + return (i + 1) + iters; } @@ -1722,9 +1705,6 @@ int CG( const reax_system * const system, const control_params * const control, Vector_Sum( workspace->r , 1.0, b, -1.0, workspace->q, system->n ); - apply_preconditioner( system, workspace, control, workspace->r, workspace->d, fresh_pre, LEFT ); - apply_preconditioner( system, workspace, control, workspace->d, workspace->p, fresh_pre, RIGHT ); - b_norm = Parallel_Norm( b, system->n, mpi_data->world ); sig_new = Parallel_Dot( workspace->r, workspace->d, system->n, mpi_data->world ); -- GitLab