From 05e4ecb190bddd1a4c117b212269efac077fb63d Mon Sep 17 00:00:00 2001 From: wangmeihua <13383952685@163.com> Date: Fri, 24 Oct 2025 09:31:11 +0800 Subject: [PATCH 1/6] rag --- rag/init.py | 1 - rag/ragapi.py | 9 +++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rag/init.py b/rag/init.py index 56bf3eb..3dcfdee 100644 --- a/rag/init.py +++ b/rag/init.py @@ -3,7 +3,6 @@ from ahserver.serverenv import ServerEnv import aiohttp from aiohttp import ClientSession, ClientTimeout import json -from .file import file_uploaded, file_deleted from .folderinfo import RagFileMgr from .ragprogram import set_program, get_rag_programs from .ragllm_utils import get_ragllms_by_catelog diff --git a/rag/ragapi.py b/rag/ragapi.py index 9174b00..355c20f 100644 --- a/rag/ragapi.py +++ b/rag/ragapi.py @@ -8,6 +8,7 @@ import math import uuid from rag.service_opts import get_service_params, sor_get_service_params from rag.rag_operations import RagOperations +from langchain_core.documents import Document helptext = """kyrag API: @@ -254,7 +255,7 @@ async def textinsert(request, params_kw, *params): result = { "status": "error", "userid": orgid, - "collection_name": "ragdb_{dbtype}", + "collection_name": f"ragdb_{db_type}", "message": "", "status_code": 400 } @@ -277,10 +278,10 @@ async def textinsert(request, params_kw, *params): # 插入 Milvus fields = { "text": text, - "fiid": fiid, - "orgid": orgid, + "knowledge_base_id": fiid, + "userid": orgid, "vector": embedding, - "id": id + "document_id": id } chunks_data = await rag_ops.insert_to_vector_text(request, db_type, fields, service_params, userid, timings) From 3e090c23a0bc3fe1256f84a443cd2b3f05a9ede0 Mon Sep 17 00:00:00 2001 From: wangmeihua <13383952685@163.com> Date: Mon, 27 Oct 2025 18:03:17 +0800 Subject: [PATCH 2/6] rag --- wwwroot/test_textinsert.dspy | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 wwwroot/test_textinsert.dspy diff --git a/wwwroot/test_textinsert.dspy b/wwwroot/test_textinsert.dspy new file mode 100644 index 0000000..f22e396 --- /dev/null +++ b/wwwroot/test_textinsert.dspy @@ -0,0 +1,9 @@ +debug(f'{params_kw=}') +text = params_kw.text +fiid = params_kw.fiid +db_type = params_kw.db_type +env = DictObject(**globals()) +keys = [k for k in env.keys()] +debug(f'{keys=}') +x = await rfexe('textinsert', request, params_kw) +return x From 07e971296174a53bad04c89b023f714a5e7dcdb2 Mon Sep 17 00:00:00 2001 From: wangmeihua <13383952685@163.com> Date: Tue, 4 Nov 2025 15:15:26 +0800 Subject: [PATCH 3/6] rag --- json/embeddingmode.json | 16 ++++++++++++++++ models/embeddingmode.xlsx | Bin 0 -> 15810 bytes 2 files changed, 16 insertions(+) create mode 100644 json/embeddingmode.json create mode 100644 models/embeddingmode.xlsx diff --git a/json/embeddingmode.json b/json/embeddingmode.json new file mode 100644 index 0000000..bd20da7 --- /dev/null +++ b/json/embeddingmode.json @@ -0,0 +1,16 @@ +{ + "tblname": "embedding_mode", + "title":"嵌入模式", + "params":{ + "browserfields":{ + "exclouded":["id"], + "alters":{} + }, + "editexclouded":["id"], + "toolbar":{ + }, + "binds":[ + ] + } +} + diff --git a/models/embeddingmode.xlsx b/models/embeddingmode.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..55a8a769113707df5d4783f488a170a9073bb456 GIT binary patch literal 15810 zcma*O1#}$Ct~P3BW{4rCm>FY^nVBJGW@g9C%*@Qp%*@Pj%p5!B*#9`$_w2KC&$;jQ ztTofqEq$t%s!LTWNmc?B3Rhe}?;$chaKQ=sPHE>W$}z3uo?pj39>f^e5(AZxWI5Bm$L$0&RgdniR?2%d zwd1GT2a;IQM$|=r$0N)#91D)QnW2vRmjwd=?4$w)ZkCs3GPJOl#BpvV!&b-L%Pg@ zTVbiMV?eSQQuI~AHjB_OC%8Nt3tr>f~w;R13*PT9FJ-YDHuz0#DC!=?q6MGC#1%%&_={-Hm)@p9Y%h zP{TLJy!Rp>s!fzMKO5#6<}zFwq+*1Zbwgz4)1X>L-cB~a=~LZiH%E8~JlU*a4PQEtsj08%uNr`P?`Hi#BfId*BW zHC4I4X!)Gngg)G+fE>3>7b~B-vsDrVe@={Sux+_~_>$31c89o!t);3go`+@w)0u99gYHiBsU*c*h)2aJb3Gi-eu=Aap)g?pQQ*sjuN1Abb!uL z|Dkh7dk1UFH?0@MylEY6=)!mF{kKdqDXF&b#yoR2Bt)1=t%Cd`Ws}(1{JHmD&P@Ag zh}t*bx36BF?>p{h2g;6n_#+#cKKkkM5k{*g7tXc8@mMaJK=?Q|SRHSgr1a#~+r)hS zT!5H|{q69hsgHeWt? zGkff8r?BxhcM;(dOFKEWaUf{3$?)!oOj9ngV0)CQswsuj3#zmWzK2c<7UAMI5Qfs4 zwVMcA-~{WVkbFPg@V#t{d(+*53A!o0wR2^3CJ)hS6OnE)f`c?XHyT2hk3xT@rSP6J z`fJz_Z$xg`jAhHu96pnAs@_}{3f1ATd{K5{uG=x2)bg+29|B*<{ADmXUzuKk-n#RD z9(_Oos&%pWrThO(fbg5Z+1k$B-o((*;mZI6ph+fhwksH*2bo=Oa)5Aq10-yy67XS;-UyxD-6@LVjZOmF7pUg?@4#iosbugxfu*H?MLOs z(qfec!Q~{8K%X6Z%soSI(>tiN3q@fYGVjC53lXrt23#|Fkrs)YrQk-5&N|OemT5ke ziDi&-!`=wjiRU6nc|!Y`lb?scT4}f1$@nL`M$9!dL7 zj1;!{K<<4=3m3pVTOc0zjMq%*SmVU9*Tq zCmg@;=Pu&x?)BBoO%L)zpf833T>pJ24dghoy`C-JShjVPpiS}_@0Vf=N-E&PS+r04 z1rQG~mGHuXP%Bp8B?y%(0TvgftqgaP=Cw_fLEpRYg(ynMgg>k@s~25^f2+>YBo<9> zj=G)`azn1r{bB?+J($JlF;sxj;H#)$VnJwni@2Ek~+yW<7$-v6c7Grslu24xBR zIeMf{rao^ZoXTa}^g>fI);acgMo>5b*K{QfG3Eps;t|`iDr3Wy$h~+FNZQ2uvY)qW zK_pe$TLUbk#VtF!@?C-MN-vNnuUXKhwA+&B zjQHN$W%G4>4mDdOHUZUDC7}wa*?-##+xRgDx`|lu2XaxRc@=F!5(Z5ezpt--)^WbP z`ZwHM=tvbQa2_#UZ9}+P6Y(gL4dHu6RqZK;;WWH0tcnGlNH3U}t$cq0T)wMI+n+ON zDNXWoSWTbSv9F!g{67p5?=iEGegX>wl_C#1OL~8^hV!e-O9*-ZT?P7r$| zHuOaB@`8Hq3F`B<^sU{z4G+k4Ic+NvB&->5J~UBperT2i!a@1 z3I`0<-Zy-`-r`1WAo}E>I#(ME+HuUh`Vlh;A`|}dhjNyn8YaZ{BB}if=1X1 zYQH1OPS!6}1hsZAkP2+{E?5S(!m8XE6~hW6RK439-qjG{aMZ2BGhC+f4DOHtx)I6x zqLu{|eG8p@7bs=7I7rkhySH^F8o1aq&ax$$Rvsu)iGXLH-|F`62&?3Tk=bONx#|6gL`U$0;<(z5a5YkuLo$aApd-e-;Hcn_5plX?|=7QOmA=TfRdJV8aPBv4k0;PuiG!4_!-|wj6PD4f>}@9K?#@~!U$#@!HAdi(cUckoK(YEN>Egf%+o5I%Z98ar zsa&n@7iA^Z{54soi^i>+Ui|+#IRH5sld?|!SFGTJv3{G;l^C3;Q6<4b$ z?>Xpg4A8m6G$C=$}8$T^$8N9NR~mXVFP+z^OZ=c)de$Em<3ZY{1Yd)24e{+y;}; z!Q@3C4ZOj+P7a?5s=@LTWdu_(WX{lBz#>YjN9Wo_=VQCsk6)RC0vYQ27 z8>1Vzi@A*nX2;;H=}&s5Vg`hqXHSIsn7j+ngf%kS_zq(o7bg{@S8hm*DJK1C(aOFV z)m2oE;6tFBehT`#5LUXK@_ zu$ZQNPNfZ}0jXW+nFRa6nI1C0!6R+idsEEV?+4H9y`|@6{Y~Bv6Hy3`ElRefMy@0f%?n^d}VxwZ7KXpaDTV!wi9x(0I2(CK8E*!faqe(1w;1Yr0WJ z#NqGNFDr#mLuc;;g&dL)D)k!$#`I)by7;g&<@Yhk^AtwEf*5*;yEgNs+@J+AJW^X$ zOx4ws7xo$5>gGDLo^wv#2QxMwZC#S4WGQT7Yo#;wD!}u?L36trttzyVZ%8`Ols@2e z+Ser}kiyv26F{JfOD?&0Ikny%2FE5i-E+Tl0)I&a(!hpTivOnG)(62GaBgwLzh3*& zmGlDnNB6Vah+6{8p3G7HYxiS$>wXEzT2XndXn_}0KX?hvtua9$j6T4qNipFx$Wv96 zdTqFMi%kO+E<@M7dQJI6PdG(JHL|_G|KYCf%|)0pF|^bOJ?U26a0G)KM1_1<$*i=2 z2Xl2mAalVR-2HJusgxmuw%^!tVCXE0*Fu=7eK~KD_6tk~P5(@b7oJcf*Rk@-^i3?1#M>8Hm%o$`w>Bu~*T&?<^T-mYoIQnAW-8}u1r`EBELZlc->PQ?Q=~roEh}}~7`YV&d1qMtT1Gk3S)QG^1=}2g2Ttx; zVW}>w>qjV6m~}sr zE(BpOym16z^8TOo2Tio9z=PnFN8)Oj#rd!p2_qm04m}=Nn3IXAnADuRZ~v6(X+7@b z)$Yn+6=%`=+A^R8Qf`4!(yht%&!mUA2+8MMxrl0_Vv>Wv3_nHN;H}mG_ zGB4Y+C8OfrF2=r{?u%t&#+wpMh22~cxoW?GYUf8`jsV~?hOUV`+eHDlkGL{1k9LK= z1IQ1q+C>TID5lx~dImxQIFl~KyBQ-=nf-=9thnQkaMM~+!M0Pdq}G#m_O&te2-LIFR z$VG5M1-8WA@mH_`aAYQ4#sliXl=^+FaQ_lLIpIv%h3vDvFE^TyAP@M` zGMl%0sh4m*8~xA}_iN`_9OO=Z^o)MaZTtO%e1eF9(BrD}rU!)6Fgok;?7-kR5v6LF z&R%GkcStc0+gdV?ZOHAm)$kxU@(LUIJ#Idks9NoPl(8h8S&i2#gPb{4-;~zXzOG~3 zB{o+NETi=C-%O9`n6R2cl?_hYVXd@oL=0@7rajfLcQuL(>d%-YWF-vtG?d9RXCegGCf6N(>CP zD=>^Q2zQK?NtCT%>h2RW-?A}K7bk?q`DsBjYy=zJu2|y}T~Fs$uIN5C6hxaLsR%sl z{%Hd%Z%pT*`Q@uZ=q(dyuGSAj$995)Qs~Kxhn<^vlN&i|bD$o1U%M%f(4u!7JTWyy z96ne-mAh;x*Suym_ofwik6rmfa%p;E!W*ZTy@1mJO?ZMimS?0c65_A$K6Cp}*!Nsg zT3cTR&ufzNr_4gSLbh2`Jc3|**;%}T{?mO5bh}~Q0o*4E{C|C8*nYWB6^*F0k4Ros z4lkhJ-QSadEtBU{CM}fAi!aQyaMf*-m;l>%L$7&n)xd)dnk;nkeSb*1V#6GY+bwD; zWj?PzgHeX;XJzZpb{zbk+X?n9t1FB5vH#FT{lIj@3@4hPo;XX|+k^eK-1Dq5-VT&& zUW>0XoegsAxR$D7cG3X8LU(U`m2&U0p+i+`Zk9PoSjp6q4R@(@HkWL)FXtyZ!Knu= zBTn6^@=Pk#2O1g$gvU1X8Hc>4rst&RJ7@ z^GGKIZzIsOopEMr(K0O#(_SH~8f{Facn-2eUZ-khI-7*0r;aMVAyd6l*?GYaqhi-| zslI-op;bYMI8L2TJ{=s(E&P0xk*I@&Bw`U?gVNqRR}3>V*Izu#_Hdpc;;z4XK52XQ za}rZV&w!D;FHru>pc8L=hdY(Cu*)8DE#tNG!HPcU&cy0<@O)AS;i6~#%3I%gnoK|F zPNy@#;c!Ns(zy0hu9);t;tz;-gmMF9=tFBfr#Y)BK)kSumHeb1`7Vln&dsz6Xw~_k zM54(d>Y_$`VOVKSE0E%n7furdMtU-+%aEj&WgqeXT0V?s!ne(lvNmZzrxfsUpdl$< zR4&^q4igd__XO?ky`oh-In2E7_pj%L5g0Yy(o{824%lM|B~Xr`VZ9M(;F*Z%PhtUR zI90Oiq@;539bEI*lzchL@vsgh>#l;8rP&Ia)-A^Q&Iu=Ih>p6se*c zXm|kElcwDMDBiqmjr7&DoG6-*3av2FPZ7CnQ4VpFVg;^w*_hn%1b5Yz+-~ll?0~&7 zt6cV0hh|BYd{{Iw5$rbv1eJVpPFL`-tY<9H5D*6?R>=&iT9R;w9Gn2EcLXew%r-`& z%p};+=$2nmX3J;}aP#387&Uk94>d(VjVq-#+z?5y*{N3hD{6D4v{mF?$a?LU$B;i8b<{{bIvIsW zebzy3xTWT=IkJup0b;{d>FRUu3_C ze?{=m)TW|vS_Y&XNOl6oh{zo!a4V}nYr^9Gz}(~upP?M-TA!f_1Ym}A~DF`gTGv)2kZE( zI%-iSj|>b)Z0lpl9$>6Lh)S|U6-D`8WN4c$Cn3VDK}`%v%#%(Cy19b~KMu%!%KPER zUQ^WbS<4+y0EIloJRjc!-@{;H$Y5Yx>Zjn8L-}*O_p4XBxzTutSKLSLXwcAzAH9@b z9}>vXT!G@9teePL5!zhIv43Rucs_x1$uYFY{)S9gIQy^28h z=-6C6T%>7-*t_3{=~UV{TF}7I_R|xe9f4_GHL-F!D5H6=FMOAA>6F*Gv`U^%Co2nY z=vYEANS$zEM#Jxny#@LOM4m+KctkC5i2MuVIgz+#ok1 z{-^5^wkmwDqd{2I*Qcl~p4T&2Hl*7b-BKa@L)q9y~{J3jiegzc2pynVRkm(rnb6W9=`P(9FlMl8nr-RG6u70_S zxll!_ZpYYZI;W}|c%RCf@ngR*^$gCo3AAeoroYFIjZqSX&GU@q(>9{@jW(lV4QlGp z64oj^-yz0@#s!gse0X1|j8&CgA{a8ZheSFLcMlv}6ZTDK5EOL@6NM2^ceLFrIKXB_ z^PpOr_(+AHXWf)Hzph`{PK=B;3o@7ibwxkUDm->P056)EwT2)RI-dNy75$Mf0i%0R z@kKObe5KT#rEi5p7A;s4ABU897>Jxq;d;y%RkGR-&g2<}UUfshWtt9YQMXc;1E%)4 zv&gQ#5c}&>x{x0q*S%$P-Az(AyQ`^`HRd@3yo#n@UbS+m*C*QL4`*cW zSn}8-K?yh!b-_lpF`w^~-hz=S8hH}rm@P0YfB?HE&2+W;@-B1To8;DE2>KKq$1^{q*ZK2%NY96 z39=mGJon^ax!|y4#XRGjtHKs5I(f4hg&cA{-Dwl8=MNYM6D-R#=Acqi1=%Ie>sM)M z@Z>2#hIrm^P0shp2C|%=GAxiIw((R6T|0NC@~E*fYl8&SAvMFKD1e#biz$5v4)3mS-k9I}8BiWE<#E-HioT$eorks6Hk-gWXN|$MPc<338m3qJ7 z$If*t>@aFgL|!9YRtSQH0$k-{=E^JpVofUXUSobsUZ2un$%CfYoSZ;}IZSRwPLg42 zM}1V{DGoQ%N7SZ2`l$XM~MUfS_!`Hv3rizb8H-6eoKZFAt?s!1l@v_ zf#MC$eORu5Rkc+AoX3flwSwjZ?p_W&xXiL^zYV*E-f0)F%K4bZuFuI`^wl$3NKyFh zV;S-kc0SYDXI;SmoNGe0pR*GJ=9*vt-|@C)1Na4d6J0w)0|f^=Q!8Wpw|SyW%lweNpMBunsZh<&*igLK_KhK2a%Lgz0<=bRkmapjbj2 zSs$oP@EQb;D;PXZG^I9JktvVsiSZ_;IDc)Y;oe0~fRxhwki2Z1rb=@@ zX^xHaZyLup^`@r8U^%gGcu2xmLn1(5GbBerDZ(%D%n!zAIHJQc=AK zUcjr_02=B2j>~~L1S?)ra{WE2OdFJ_$}QpAj+^5d0*3>KOuWPkpTj5DDpNi zTozTOZu4?DtPqGZG$6@ip z7NTllN9UOk3PKXVwD`(&pZIc&TN22vzCDT^7yUfb559O^Z_5eOE?#Y6Hw|47PgkU6 zC}-)5kXnJEbG5zUzn0fMvynX+wfXcTHmYb)JAaU%Egr2pv`L8fXytVZzJ<{No z%&K$nGk&^1uO(8B@27fkOC`r}t17ZMO`J06cPoWCrMDZ-(ohB)%vYoL-`j=`4kjh5 zZd+`$`AuoVqFp?b*k$_}cLwZ5J1g=wNp0(Og?jy|2}!K8M;1=6i%dxu#*{|^GkX}k z*zsh$PbHB|!zt%uC@{~l;t5QC8b2f*OTLhP1f_4u?mUS!opj;H2?+TpB2Y*XrAx7^ zg%DsWhz4a;?U8XM-()t@$a7e%*3sA?|m?7nvE_$`m z_dU}>OKHoP+fyk$!uLH`e$=eb=!+KSQ6l`Sa^)96qd$((94Fk84kSBt$I8dpRFi^) zq}g9&*+Ps-TU|!^NlaE^ZaE!1GiZN~g2|W_abh2i%5~A!r1{wU@7l!@IB)L^>B*)# zE{7p-J`S^juI2>cxjn~B^2OsV-c~ex>#eP?#I6WW2_}fwPg-s3w*T3UQq9)1I$u_& zz(sNqHnZ1@2j*psoOZu)9fFIHYE=B|523+8`;# zo5*1){8Xj#RcrPmZt@C+gFUO|JQ*tvUha_2Dj34<NASYUN=tJ{@K?o z7&#oy0c(L26(Ar$u;AC|?ci!*_-oi+(6o$NQ$>G??DwwL?1Ai;)Ejb5%aA-W7@QSv zE0z!Q2P+;WnG6rmM~qaMp#F4h)dWXPi9#u>TG3xyCeJesjM1+=*7#(c>adUM?PGt@ z;5j8#sqdWHpBXuSf7Z@>m;PjAt<8E138D~|S~s_=QJ38t^&yAi zlS<)QajnnWGPhRj`4)(~ux$hTuopqwPTU2m2+BdrVwWz~w9JJCDwMxhtg&86D{QPf zh=AH7!6j6(+4&`!^(Ub@9ZcYB&MO9BZ``_N57O*uj3!urZjtXSXy=dgZVsI4e29y| zY_A#gIz-)SDJU)wMcXfvxi%DheL6TjT*1 za5i49WL0y9&p;+t*30og9pp`kUnH-;a4!@UM2m2m=>4~Di0;@hQnEa#*yo=eiDs#W zk5PwCCGh6c^wT;M45yZ3j$DuVBswKEDSh{jIrKzdVt^1sm?tFpDdUP}Ym$hnfs0LK zl}n0;$!#JX;rzqutq|K#`H3xt5+rNu%g>evXJr?m{8ZRRK9j{@dr~B;kfsMPH|Si0 zna1CZ=D#F`j|NS9>@={D#Km>IvjU3jtD96RKpYa%HVfL)~T!7>^XWumJG047i*)GfrtP z){=SACiF@R%U*VhYL6vuZdkUG0rwn~~x4snUz)bzSV2mfr zb)~@ff;w?K3a%ZS7OZ8d%5*9?!g-&{uC(u~&u+_!VFwjhRt;(26W3Cv6A}mq#5_|D zTPx#Cq;6W67@Dnq-&?wlCBQ(9HCeMn86;iB88d52n?N6#mAco6C^uL7-h`uewpzKe zsTD!ZC+z81>(rDsHmI9yYKovnw64!!YxcgRB4+_7LU^v&q%0{*!pv_ajl4m*`^Bwc zwb)E*V>&SdY5SV>>opR&`hye63H>on!WA0R=m^(LJbtZ*?!Sd9fM@gXwm8lf42t^zNYFH)`8Gh zR3AKWs}d^r3dvN!3Ai~~n$Oid0>hDby1#yThS+?Mt?b4RBvGo7#@$wT(SiqwAb~7& zBSEffh>a1WVP*pq);%NXXpH8vh^W$`e5W#?pOmF8yvZ(VexKMw|24Nplyb zuu>I6mN5%mt-u%kLz+(cI2Qk4A`K{H{y`+7P@&~v73|v8VDj3PU?@hwulB-VCYx^r zAp?G&C{bYSO{_SRxiMu(|Eywad1GNcX3M)de<96Q2vE}pqBHqbdH>r?tGdg3E|%U| z;Dlmxj!AfV#?Gy%u^9f_y>~xmgt3=<1zF{yJJ`P=8FlmtD7!6>EE)~f+->;m^)AAV zA)eB%EYAcegq;ntAIsFR94;m?cZwfePIIvG^CV18@_~CbN{vzQc3~X#&}rRubRR=F zNMMvSR+Uq32-#C82Aqq#Rfp~%A{9*T--Sj75>Qa`jS6sR^!E?&fa#l;*jI?{{U&5oDP!MH1Ght) zF-r0@+774Wif;!_M~B&hGtvsK5_6+#K=~;shg@4y9+^H(Z2#K&%cw_^;{CQ8 zzv8@H4rqmo$-#%XvpvKa*~e))Q{se{Z1vn@hznjcB?(1tQEu8|I=P-{zFmoIQWH&C z8r%K=2C1A?_C@(aVl;7yaETlqOl6529ZYeFcUNB*@$*S362UEZp_1~|wLxgYG~8|a zcbc<1`_(a;Zgn)@zJ6Z>+^J3+sNiP*3iuKo6Sp--40S1JZh$mbTVM4fM!>DJnR$^tUA`n1}=orUH{3H9J->LH9lE|5I**}mRn3EQvQxosz*>W z&s(Y)>6<^SFQx<-%)^%$$Bm}PTjZ6-Gzyv-J791S%Oeyvhk-r8_XOCcCA&p3Q}|s` zIOChcPCA^tWr)?#R)O0u8<8RRltI!${F{)iT+VExOMS#>PdTGkCc?A9kLM=WYya29J&!UrbZtynp_9 z{^lo2i2cD9MD=TQ|HnF|LcZl>sD2nI=r z$Q)Yb+|_GFik!*3-IzOH+pap=;-b33O@%ZMXZaY0r>18)q|e>7RtrD8a;2<4Sd&Cb zvlF3E6b?pZ4oroq2_${edwwu|Zck=#pum~DXutb<6qhX6?@K$-dp|}u`C^Mn&*Sy| z@;NSQ((3^`h{j`pAF8$<`DD+U+UBxF}#-4!tBUGP*N#BIW`%! zNkR)uSqP?Fu&UqUP!j?Y?n!Z;xe>LacSSORisBpvaMF!DntQ2+6k<=R9fFG3%&$W@ zV4SPMj*DD(s!xytAP63=5RFMYM4uEEtb8w2*-vF_R1usU5XB?Dp89sRpW0$Qp<1jT z#+aB0v$0fSnZ%9M2-K=Ohc$mi?kjpQbQ#;XwInEuK(x9L?HvojaUA(^GPi&_S=z3$=2(xqxs?7U6-Mdw$ zj-7?2ZJ0V&cA zg0&rg*L(8!+vkd3r6q~c@{pfq4C17SGo3k7l;z9fWDzRj;MFJgW9_h#wHo~{JeY+q zE5$XiNg3?k24Tw{(Eea;U3xOP&7G1bFvUdmO))0J(IOf>+|MI#xeMus5CjCrl$|5b z*oCurPW(?5(ume^$T0wA&Fs zX4XfKBy<<>;y2xyct#?vPGqbAv(k733FZDw*l56f+~>X3gVq+9Z57Vui=E>h1+kAW zvt`-G)1zlu|4k0b4&oz|YlXzf79AZTq5Y0bxoU z(yH{6O#kk3Uu9jLkJAncSZkCb1vElMwPlq2{kq9ePbz*zOj1shq)i9UF?AAKG1srF zh2+O@nNon5w-XyRnDXeSmwjqaL^0JBl=N?PPJI2E>w!7C)R4O-qV)W51z3~AL)D*S zcgB{fl!2_iVrhu(=B9NZlBr>nUGhGQ_mUf%qXO*K*~VC4r;EH@y5;;m9fL%`a~!O+U# zv!bhwp}p2yP+=lrOS+dHE$BkZBdDOsh$=gm0gm?BzJ%l}==^OsjfLf$0@CJY9WWUZ z*gpM^(a2)v)?LON%?bRz5dL_#u93s|s$3X&c7a?9{?MuZ4a#O6P{*|RH(`Nsj4u|` zUG0Yv7#Q@~W)Tb$9ZiVod6GFuSlFI~ShjW-PIG}}|6r4;_Nk&Ly2kWzu1!Wt{`2rG&dLH1s((h^u zY-I^LGv(B@(%6NpLm$U00u9maNjx&`B?CZzoa1%GEGI`U`p71O+!IBy)-F|zR@Nlp zd&TsPfR|2#8psf!UmmYoaeblO2f7?<9w}mo8>~Jvotw}5Wbg>(gP{Eiq5iJz`_pOw!vO&U z5dyxYSpTC9e*^sYA?yv{cbT9M)87NyKZX7|q`h(c7FzHxo?oxv|0V2g7xDM;R+IR4 z{9F0rKji`aQNZ{{{9E3Ke`ouvobgW;{aJSRH+L1FyZ=k*{}kc<$^K^r+28EMfPnVj z?Ek7K`@bF)& zP|klq`IRI7zfk^2?)@9(6` Date: Wed, 12 Nov 2025 15:15:52 +0800 Subject: [PATCH 4/6] rag --- rag/folderinfo.py | 112 +++++++++- rag/rag_operations.py | 505 +++++++++++++++++++++++++++++++++++++----- rag/service_opts.py | 27 ++- rag/uapi_service.py | 37 ++++ 4 files changed, 613 insertions(+), 68 deletions(-) diff --git a/rag/folderinfo.py b/rag/folderinfo.py index 89215eb..afd1b9e 100644 --- a/rag/folderinfo.py +++ b/rag/folderinfo.py @@ -17,13 +17,15 @@ import traceback from filetxt.loader import fileloader,File2Text from ahserver.serverenv import get_serverenv from typing import List, Dict, Any -from rag.service_opts import get_service_params, sor_get_service_params +from rag.service_opts import get_service_params, sor_get_service_params, sor_get_embedding_mode, get_embedding_mode +from rag.fileprocess import extract_images_from_file from rag.rag_operations import RagOperations import json from rag.transaction_manager import TransactionContext from dataclasses import dataclass from enum import Enum - +import base64 +from pathlib import Path class RagFileMgr(FileMgr): def __init__(self, fiid): @@ -53,6 +55,10 @@ where a.orgid = b.orgid return r.quota, r.expired_date return None, None + async def file_to_base64(self,path: str) -> str: + with open(path, "rb") as f: + return base64.b64encode(f.read()).decode("utf-8") + async def file_uploaded(self, request, ns, userid): """将文档插入 Milvus 并抽取三元组到 Neo4j""" debug(f'Received ns: {ns=}') @@ -104,21 +110,107 @@ where a.orgid = b.orgid raise ValueError("无法获取服务参数") rollback_context["service_params"] = service_params + #获取嵌入模式 + embedding_mode = await get_embedding_mode(orgid) + debug(f"检测到 embedding_mode = {embedding_mode}(0=文本, 1=多模态)") + # 加载和分片文档 chunks = await self.rag_ops.load_and_chunk_document( realpath, timings, transaction_mgr=transaction_mgr ) - # 生成嵌入向量 - embeddings = await self.rag_ops.generate_embeddings( - request, chunks, service_params, userid, timings, transaction_mgr=transaction_mgr - ) + text_embeddings = None + multi_results = None + image_paths = [] - # 插入 Milvus - chunks_data = await self.rag_ops.insert_to_vector_db( - request, chunks, embeddings, realpath, orgid, fiid, id, - service_params, userid, db_type, timings, transaction_mgr=transaction_mgr + if embedding_mode == 1: + inputs = [] + # 文本 + for chunk in chunks: + inputs.append({"type": "text", "content": chunk.page_content}) + + debug("开始多模态图像抽取与嵌入") + image_paths = extract_images_from_file(realpath) + debug(f"从文档中抽取 {len(image_paths)} 张图像") + + if image_paths: + for img_path in image_paths: + try: + # 1. 自动识别真实格式 + ext = Path(img_path).suffix.lower() + if ext not in {".png", ".jpg", ".jpeg", ".webp", ".bmp"}: + ext = ".jpg" + + mime_map = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".webp": "image/webp", + ".bmp": "image/bmp" + } + mime_type = mime_map.get(ext, "image/jpeg") + + # # 2. 智能压缩(>1MB 才压缩,节省 70% 流量) + # img = Image.open(img_path).convert("RGB") + # if os.path.getsize(img_path) > 1024 * 1024: # >1MB + # buffer = BytesIO() + # img.save(buffer, format="JPEG", quality=85, optimize=True) + # b64 = base64.b64encode(buffer.getvalue()).decode() + # data_uri = f"data:image/jpeg;base64,{b64}" + # else: + b64 = await self.file_to_base64(img_path) + data_uri = f"data:{mime_type};base64,{b64}" + + inputs.append({ + "type": "image", + "data": data_uri + }) + debug(f"已添加图像({mime_type}, {len(b64) / 1024:.1f}KB): {Path(img_path).name}") + + except Exception as e: + debug(f"图像处理失败,跳过: {img_path} → {e}") + # 即使失败也加个占位,防止顺序错乱 + inputs.append({ + "type": "image", + "data": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=" + }) + + debug(f"混排输入总数: {len(inputs)}(文本 {len(chunks)} + 图像 {len(image_paths)})") + + multi_results = await self.rag_ops.generate_multi_embeddings( + request=request, + inputs=inputs, + service_params=service_params, + userid=userid, + timings=timings, + transaction_mgr=transaction_mgr + ) + debug(f"多模态嵌入成功,返回 {len(multi_results)} 条结果") + else: + # 生成嵌入向量 + debug("【纯文本模式】使用 BGE 嵌入") + text_embeddings = await self.rag_ops.generate_embeddings( + request, chunks, service_params, userid, timings, transaction_mgr=transaction_mgr + ) + debug(f"BGE 嵌入完成: {len(text_embeddings)} 条") + + inserted = await self.rag_ops.insert_all_vectors( + request=request, + text_chunks=chunks, + realpath=realpath, + orgid=orgid, + fiid=fiid, + document_id=id, + service_params=service_params, + userid=userid, + db_type=db_type, + timings=timings, + img_paths=image_paths, + text_embeddings=text_embeddings, + multi_results=multi_results, + transaction_mgr=transaction_mgr ) + debug(f"统一插入: 文本 {inserted['text']}, 图像 {inserted['image']}, 人脸 {inserted['face']}") # 抽取三元组 triples = await self.rag_ops.extract_triples( diff --git a/rag/rag_operations.py b/rag/rag_operations.py index 0280397..06e20fd 100644 --- a/rag/rag_operations.py +++ b/rag/rag_operations.py @@ -103,84 +103,477 @@ class RagOperations: return embeddings - async def insert_to_vector_db(self, request, chunks: List[Document], embeddings: List[List[float]], - realpath: str, orgid: str, fiid: str, id: str, service_params: Dict, - userid: str, db_type: str, timings: Dict, - transaction_mgr: TransactionManager = None): - """插入向量数据库""" - debug(f"准备数据并调用插入文件端点: {realpath}") - filename = os.path.basename(realpath).rsplit('.', 1)[0] - ext = realpath.rsplit('.', 1)[1].lower() if '.' in realpath else '' + async def generate_multi_embeddings(self, request, inputs: List[Dict], service_params: Dict, + userid: str, timings: Dict, + transaction_mgr: TransactionManager = None) -> Dict[str, Dict]: + """调用多模态嵌入服务(CLIP)""" + debug("调用多模态嵌入服务") + start = time.time() + + result = await self.api_service.get_multi_embeddings( + request=request, + inputs=inputs, + upappid=service_params['embedding'], + apiname="black/clip", + user=userid + ) + debug(f"多模态返回结果是{result}") + timings["multi_embedding"] = time.time() - start + debug(f"多模态嵌入耗时: {timings['multi_embedding']:.2f}秒,处理 {len(result)} 条") + + # ==================== 新增:错误检查 + 过滤 ==================== + valid_results = {} + error_count = 0 + error_examples = [] + + for key, info in result.items(): + if info.get("type") == "error": + error_count += 1 + if len(error_examples) < 3: # 只记录前3个 + error_examples.append(f"{key} → {info['error']}") + # 直接丢弃错误条目 + continue + valid_results[key] = info + + if error_count > 0: + error(f"多模态嵌入失败 {error_count} 条!示例:{'; '.join(error_examples)}") + raise RuntimeError(f"多模态嵌入有{error_count} 条失败") + else: + debug("多模态嵌入全部成功!") + + if transaction_mgr: + transaction_mgr.add_operation( + OperationType.EMBEDDING, + {'count': len(result)} + ) + + return result + + # 统一插入向量库 + async def insert_all_vectors( + self, + request, + text_chunks: List[Document], + realpath: str, + orgid: str, + fiid: str, + document_id: str, + service_params: Dict, + userid: str, + db_type: str, + timings: Dict, + img_paths: List[str] = None, + text_embeddings: List[List[float]] = None, + multi_results: Dict = None, + transaction_mgr: TransactionManager = None + ) -> Dict[str, int]: + """ + 统一插入函数:支持两种模式 + 1. 纯文本模式:text_embeddings 有值 + 2. 多模态模式:multi_results 有值(来自 generate_multi_embeddings) + """ + img_paths = img_paths or [] + all_chunks = [] + start = time.time() + filename = os.path.basename(realpath) upload_time = datetime.now().isoformat() - chunks_data = [ - { - "userid": orgid, - "knowledge_base_id": fiid, - "text": chunk.page_content, - "vector": embeddings[i], - "document_id": id, - "filename": filename + '.' + ext, - "file_path": realpath, - "upload_time": upload_time, - "file_type": ext, - } - for i, chunk in enumerate(chunks) - ] + # ==================== 1. 纯文本模式(BGE) ==================== + if text_embeddings is not None: + debug(f"【纯文本模式】插入 {len(text_embeddings)} 条文本向量") + for i, chunk in enumerate(text_chunks): + all_chunks.append({ + "userid": orgid, + "knowledge_base_id": fiid, + "text": chunk.page_content, + "vector": text_embeddings[i], + "document_id": document_id, + "filename": filename, + "file_path": realpath, + "upload_time": upload_time, + "file_type": "text", + }) - start_milvus = time.time() - for i in range(0, len(chunks_data), 10): - batch_chunks = chunks_data[i:i + 10] - debug(f"传入的数据是:{batch_chunks}") + # ==================== 2. 多模态模式(CLIP 混排) ==================== + if multi_results is not None: + debug(f"【多模态模式】解析 {len(multi_results)} 条 CLIP 结果") + # 遍历 multi_results + for raw_key, info in multi_results.items(): + typ = info["type"] + + # --- 文本 --- + if typ == "text": + # raw_key 就是原文 + all_chunks.append({ + "userid": orgid, + "knowledge_base_id": fiid, + "text": raw_key, + "vector": info["vector"], + "document_id": document_id, + "filename": filename, + "file_path": realpath, + "upload_time": upload_time, + "file_type": "text", + }) + continue + + # --- 图像 --- + if typ == "image": + img_path = info.get("path") or raw_key + img_name = os.path.basename(img_path) + + # 整图向量 + if "vector" in info: + all_chunks.append({ + "userid": orgid, + "knowledge_base_id": fiid, + "text": f"[Image: {img_path}]图片来源于文件{realpath}", + "vector": info["vector"], + "document_id": document_id, + "filename": img_name, + "file_path": realpath, + "upload_time": upload_time, + "file_type": "image", + }) + + # 人脸向量 + face_vecs = info.get("face_vecs", []) + face_count = len(face_vecs) + # if face_count > 0: + # for f_idx, fvec in enumerate(face_vecs): + # debug(f"人脸向量维度是:{len(fvec)}") + # all_chunks.append({ + # "userid": orgid, + # "knowledge_base_id": fiid, + # "text": f"[Face {f_idx + 1}/{face_count} in {img_name}]人脸来源于{realpath}的{img_path}图片", + # "vector": fvec, + # "document_id": document_id, + # "filename": img_name, + # "file_path": realpath, + # "upload_time": upload_time, + # "file_type": "face", + # }) + # continue + + # --- 视频 --- + if typ == "video": + video_path = info.get("path") or raw_key + video_name = os.path.basename(video_path) + + if "vector" in info: + all_chunks.append({ + "userid": orgid, + "knowledge_base_id": fiid, + "text": f"[Video: {video_name}]", + "vector": info["vector"], + "document_id": document_id, + "filename": video_path, + "file_path": realpath, + "upload_time": upload_time, + "file_type": "video", + }) + + # 视频人脸 + face_vecs = info.get("face_vecs", []) + face_count = len(face_vecs) + # if face_count > 0 : + # for f_idx, fvec in enumerate(face_vecs): + # all_chunks.append({ + # "userid": orgid, + # "knowledge_base_id": fiid, + # "text": f"[Face {f_idx + 1}/{face_count} in video {video_name}]来源于{video_path}", + # "vector": fvec, + # "document_id": document_id, + # "filename": video_path, + # "file_path": realpath, + # "upload_time": upload_time, + # "file_type": "face", + # }) + # continue + + # --- 音频 --- + if typ == "audio": + audio_path = info.get("path") or raw_key + audio_name = os.path.basename(audio_path) + + if "vector" in info: + all_chunks.append({ + "userid": orgid, + "knowledge_base_id": fiid, + "text": f"[Audio: {audio_name}]", + "vector": info["vector"], + "document_id": document_id, + "filename": audio_path, + "file_path": realpath, + "upload_time": upload_time, + "file_type": "audio", + }) + continue + + # --- 未知类型 --- + debug(f"未知类型跳过: {typ} → {raw_key}") + + # ==================== 3. 批量插入 Milvus ==================== + if not all_chunks: + debug("无向量需要插入") + return {"text": 0, "image": 0, "face": 0} + + for i in range(0, len(all_chunks), 10): + batch = all_chunks[i:i + 10] result = await self.api_service.milvus_insert_document( request=request, - chunks=batch_chunks, - db_type=db_type, + chunks=batch, upappid=service_params['vdb'], apiname="milvus/insertdocument", - user=userid + user=userid, + db_type=db_type ) if result.get("status") != "success": - raise ValueError(result.get("message", "Milvus 插入失败")) + raise ValueError(f"Milvus 插入失败: {result.get('message')}") - timings["insert_milvus"] = time.time() - start_milvus - debug(f"Milvus 插入耗时: {timings['insert_milvus']:.2f} 秒") - - # 记录事务操作,包含回滚函数 - if transaction_mgr: - async def rollback_vdb_insert(data, context): + # ==================== 4. 统一回滚(只登记一次) ==================== + if transaction_mgr and all_chunks: + async def rollback_all(data, context): try: - # 防御性检查 - required_context = ['request', 'service_params', 'userid'] - missing_context = [k for k in required_context if k not in context or context[k] is None] - if missing_context: - raise ValueError(f"回滚上下文缺少字段: {', '.join(missing_context)}") - - required_data = ['orgid', 'realpath', 'fiid', 'id', 'db_type'] - missing_data = [k for k in required_data if k not in data or data[k] is None] - if missing_data: - raise ValueError(f"VDB_INSERT 数据缺少字段: {', '.join(missing_data)}") - await self.delete_from_vector_db( - context['request'], data['orgid'], data['realpath'], - data['fiid'], data['id'], context['service_params'], - context['userid'], data['db_type'] + request=context['request'], + orgid=data['orgid'], + realpath=data['realpath'], + fiid=data['fiid'], + id=data['document_id'], + service_params=context['service_params'], + userid=context['userid'], + db_type=data['db_type'] ) - return f"已回滚向量数据库插入: {data['id']}" + return f"已回滚 document_id={data['document_id']} 的所有向量" except Exception as e: - error(f"回滚向量数据库失败: document_id={data.get('id', '未知')}, 错误: {str(e)}") + error(f"统一回滚失败: {e}") raise transaction_mgr.add_operation( OperationType.VDB_INSERT, { - 'orgid': orgid, 'realpath': realpath, 'fiid': fiid, - 'id': id, 'db_type': db_type + 'orgid': orgid, + 'realpath': realpath, + 'fiid': fiid, + 'id': document_id, + 'db_type': db_type }, - rollback_func=rollback_vdb_insert + rollback_func=rollback_all ) - return chunks_data + # ==================== 5. 统计返回 ==================== + stats = { + "text": len([c for c in all_chunks if c["file_type"] == "text"]), + "image": len([c for c in all_chunks if c["file_type"] == "image"]), + "face": len([c for c in all_chunks if c["file_type"] == "face"]) + } + + timings["insert_all"] = time.time() - start + debug( + f"统一插入完成: 文本 {stats['text']}, 图像 {stats['image']}, 人脸 {stats['face']}, 耗时 {timings['insert_all']:.2f}s") + return stats + # async def insert_to_vector_db(self, request, chunks: List[Document], embeddings: List[List[float]], + # realpath: str, orgid: str, fiid: str, id: str, service_params: Dict, + # userid: str, db_type: str, timings: Dict, + # transaction_mgr: TransactionManager = None): + # """插入向量数据库""" + # debug(f"准备数据并调用插入文件端点: {realpath}") + # filename = os.path.basename(realpath).rsplit('.', 1)[0] + # ext = realpath.rsplit('.', 1)[1].lower() if '.' in realpath else '' + # upload_time = datetime.now().isoformat() + # + # chunks_data = [ + # { + # "userid": orgid, + # "knowledge_base_id": fiid, + # "text": chunk.page_content, + # "vector": embeddings[i], + # "document_id": id, + # "filename": filename + '.' + ext, + # "file_path": realpath, + # "upload_time": upload_time, + # "file_type": ext, + # } + # for i, chunk in enumerate(chunks) + # ] + # + # start_milvus = time.time() + # for i in range(0, len(chunks_data), 10): + # batch_chunks = chunks_data[i:i + 10] + # debug(f"传入的数据是:{batch_chunks}") + # result = await self.api_service.milvus_insert_document( + # request=request, + # chunks=batch_chunks, + # db_type=db_type, + # upappid=service_params['vdb'], + # apiname="milvus/insertdocument", + # user=userid + # ) + # if result.get("status") != "success": + # raise ValueError(result.get("message", "Milvus 插入失败")) + # + # timings["insert_milvus"] = time.time() - start_milvus + # debug(f"Milvus 插入耗时: {timings['insert_milvus']:.2f} 秒") + # + # # 记录事务操作,包含回滚函数 + # if transaction_mgr: + # async def rollback_vdb_insert(data, context): + # try: + # # 防御性检查 + # required_context = ['request', 'service_params', 'userid'] + # missing_context = [k for k in required_context if k not in context or context[k] is None] + # if missing_context: + # raise ValueError(f"回滚上下文缺少字段: {', '.join(missing_context)}") + # + # required_data = ['orgid', 'realpath', 'fiid', 'id', 'db_type'] + # missing_data = [k for k in required_data if k not in data or data[k] is None] + # if missing_data: + # raise ValueError(f"VDB_INSERT 数据缺少字段: {', '.join(missing_data)}") + # + # await self.delete_from_vector_db( + # context['request'], data['orgid'], data['realpath'], + # data['fiid'], data['id'], context['service_params'], + # context['userid'], data['db_type'] + # ) + # return f"已回滚向量数据库插入: {data['id']}" + # except Exception as e: + # error(f"回滚向量数据库失败: document_id={data.get('id', '未知')}, 错误: {str(e)}") + # raise + # + # transaction_mgr.add_operation( + # OperationType.VDB_INSERT, + # { + # 'orgid': orgid, 'realpath': realpath, 'fiid': fiid, + # 'id': id, 'db_type': db_type + # }, + # rollback_func=rollback_vdb_insert + # ) + # + # return chunks_data + # + # async def insert_image_vectors( + # self, + # request, + # multi_results: Dict[str, Dict], + # realpath: str, + # orgid: str, + # fiid: str, + # document_id: str, + # service_params: Dict, + # userid: str, + # db_type: str, + # timings: Dict, + # transaction_mgr: TransactionManager = None + # ) -> tuple[int, int]: + # + # start = time.time() + # image_chunks = [] + # face_chunks = [] + # + # for img_path, info in multi_results.items(): + # # img_name = os.path.basename(img_path) + # + # # 1. 插入整张图 + # if info.get("type") in ["image", "video"] and "vector" in info: + # image_chunks.append({ + # "userid": orgid, + # "knowledge_base_id": fiid, + # "text": f"[Image: {img_path}]", + # "vector": info["vector"], + # "document_id": document_id, + # "filename": os.path.basename(realpath), + # "file_path": realpath, + # "upload_time": datetime.now().isoformat(), + # "file_type": "image" + # }) + # + # # 2. 插入每张人脸 + # face_vecs = info.get("face_vecs") + # face_count = info.get("face_count", 0) + # + # if face_count > 0 and face_vecs and len(face_vecs) == face_count: + # for idx, face_vec in enumerate(face_vecs): + # face_chunks.append({ + # "userid": orgid, + # "knowledge_base_id": fiid, + # "text": f"[Face {idx + 1}/{face_count} in {img_path}]", + # "vector": face_vec, + # "document_id": document_id, + # "filename": os.path.basename(realpath), + # "file_path": realpath, + # "upload_time": datetime.now().isoformat(), + # "file_type": "face", + # }) + # + # if image_chunks: + # for i in range(0, len(image_chunks), 10): + # await self.api_service.milvus_insert_document( + # request=request, + # chunks=image_chunks[i:i + 10], + # upappid=service_params['vdb'], + # apiname="milvus/insertdocument", + # user=userid, + # db_type=db_type + # ) + # + # if face_chunks: + # for i in range(0, len(face_chunks), 10): + # await self.api_service.milvus_insert_document( + # request=request, + # chunks=face_chunks[i:i + 10], + # upappid=service_params['vdb'], + # apiname="milvus/insertdocument", + # user=userid, + # db_type=db_type + # ) + # timings["insert_images"] = time.time() - start + # image_count = len(image_chunks) + # face_count = len(face_chunks) + # + # debug(f"多模态插入完成: 图像 {image_count} 条, 人脸 {face_count} 条") + # + # if transaction_mgr and (image_count + face_count > 0): + # transaction_mgr.add_operation( + # OperationType.IMAGE_VECTORS_INSERT, + # {"images": image_count, "faces": face_count, "document_id": document_id} + # ) + # + # # 记录事务操作,包含回滚函数 + # if transaction_mgr: + # async def rollback_multimodal(data, context): + # try: + # # 防御性检查 + # required_context = ['request', 'service_params', 'userid'] + # missing_context = [k for k in required_context if k not in context or context[k] is None] + # if missing_context: + # raise ValueError(f"回滚上下文缺少字段: {', '.join(missing_context)}") + # + # required_data = ['orgid', 'realpath', 'fiid', 'id', 'db_type'] + # missing_data = [k for k in required_data if k not in data or data[k] is None] + # if missing_data: + # raise ValueError(f"多模态回滚数据缺少字段: {', '.join(missing_data)}") + # + # await self.delete_from_vector_db( + # context['request'], data['orgid'], data['realpath'], + # data['fiid'], data['id'], context['service_params'], + # context['userid'], data['db_type'] + # ) + # return f"已回滚多模态向量: {data['id']}" + # except Exception as e: + # error(f"多模态回滚向量数据库失败: document_id={data.get('id', '未知')}, 错误: {str(e)}") + # raise + # + # transaction_mgr.add_operation( + # OperationType.VDB_INSERT, + # { + # 'orgid': orgid, 'realpath': realpath, 'fiid': fiid, + # 'id': id, 'db_type': db_type + # }, + # rollback_func=rollback_multimodal + # ) + # + # return image_count, face_count async def insert_to_vector_text(self, request, db_type: str, fields: Dict, service_params: Dict, userid: str, timings: Dict) -> List[Dict]: diff --git a/rag/service_opts.py b/rag/service_opts.py index 37f3b77..9cb7248 100644 --- a/rag/service_opts.py +++ b/rag/service_opts.py @@ -57,11 +57,12 @@ async def sor_get_service_params(sor, orgid): service_params['reranker'] = service['upappid'] elif name == 'mrebel三元组抽取': service_params['triples'] = service['upappid'] - elif name == 'neo4j删除知识库': + elif name == 'neo4j图知识库': service_params['gdb'] = service['upappid'] elif name == 'small实体抽取': service_params['entities'] = service['upappid'] - + elif name == 'clip多模态嵌入服务': + service_params['embedding'] = service['upappid'] # 检查是否所有服务参数都已填充 missing_services = [k for k, v in service_params.items() if v is None] if missing_services: @@ -76,3 +77,25 @@ async def get_service_params(orgid): async with db.sqlorContext(dbname) as sor: return await sor_get_service_params(sor, orgid) return None + +async def sor_get_embedding_mode(sor, orgid) -> int: + """根据 orgid 获取嵌入模式:0=纯文本,1=多模态""" + sql = """ + SELECT em.mode + FROM service_opts so + JOIN embedding_mode em ON so.embedding_id = em.embeddingid + WHERE so.orgid = ${orgid}$ + """ + rows = await sor.sqlExe(sql, {"orgid": orgid}) + if not rows: + debug(f"orgid={orgid} 未配置 embedding_mode,默认为 0(纯文本)") + return 0 + return int(rows[0].mode) + +async def get_embedding_mode(orgid): + db = DBPools() + debug(f"传入的orgid是:{orgid}") + dbname = get_serverenv('get_module_dbname')('rag') + async with db.sqlorContext(dbname) as sor: + return await sor_get_embedding_mode(sor, orgid) + return None \ No newline at end of file diff --git a/rag/uapi_service.py b/rag/uapi_service.py index c429dc5..1d2ed6c 100644 --- a/rag/uapi_service.py +++ b/rag/uapi_service.py @@ -45,6 +45,43 @@ class APIService: error(f"request #{request_id} 嵌入服务调用失败: {str(e)}, upappid={upappid}, apiname={apiname}") raise RuntimeError(f"嵌入服务调用失败: {str(e)}") + #多模态嵌入服务 + async def get_multi_embeddings( + self, + request, + inputs: List[Dict], + upappid: str, + apiname: str, + user: str + ) -> Dict[str, Dict]: + """ + 多模态统一嵌入(支持文本、图片、音频、视频) + 返回原始输入字符串为 key 的完整结果,含 type / vector / 人脸信息 + """ + request_id = str(uuid.uuid4()) + debug(f"Request #{request_id} 多模态嵌入开始,共{len(inputs)}项") + + if not inputs or not isinstance(inputs, list): + raise ValueError("inputs 必须为非空列表") + + try: + uapi = UAPI(request, DictObject(**globals())) + params_kw = {"inputs": inputs} + b = await uapi.call(upappid, apiname, user, params_kw) + d = await self.handle_uapi_response(b, upappid, apiname, "多模态嵌入服务", request_id) + + if d.get("object") != "embedding.result" or "data" not in d: + error(f"request #{request_id} 返回格式错误: {d}") + raise RuntimeError("多模态嵌入返回格式错误") + + result = d["data"] # 直接返回 {input_str: {type, vector, ...}} + debug(f"request #{request_id} 成功获取 {len(result)} 条多模态向量") + return result + + except Exception as e: + error(f"request #{request_id} 多模态嵌入失败: {str(e)}") + raise RuntimeError(f"多模态嵌入失败: {str(e)}") + # 实体提取服务 (LTP/small) async def extract_entities(self, request, query: str, upappid: str, apiname: str, user: str) -> list: """调用实体识别服务""" From 985c5a998a3105e6811ba7067b8e1bd03088da47 Mon Sep 17 00:00:00 2001 From: wangmeihua <13383952685@163.com> Date: Wed, 12 Nov 2025 15:52:03 +0800 Subject: [PATCH 5/6] rag --- rag/fileprocess.py | 341 +++++++++++++++++++++++++++++++++++++ rag/transaction_manager.py | 1 - 2 files changed, 341 insertions(+), 1 deletion(-) create mode 100644 rag/fileprocess.py diff --git a/rag/fileprocess.py b/rag/fileprocess.py new file mode 100644 index 0000000..87e4863 --- /dev/null +++ b/rag/fileprocess.py @@ -0,0 +1,341 @@ +import numpy as np +import os +import re +from pdf2image import convert_from_path +from appPublic.log import debug, error, info +from pathlib import Path +import zipfile +import xml.etree.ElementTree as ET +from PIL import Image +from typing import List + +# ==================== 新增:路径安全化函数 ==================== +def safe_filename(name: str) -> str: + """ + 安全化文件名/目录名 + - 去除首尾空格 + - 多个空格 → 单空格 + - 非法字符 → 下划线 + - 空格 → 下划线(推荐!永不炸) + """ + name = name.strip() + name = re.sub(r'\s+', ' ', name) # 多个空格合并 + name = re.sub(r'[<>:"/\\|?*]', '_', name) # 非法字符 + name = name.replace(' ', '_') # 空格 → 下划线(关键!) + return name + +def render_pdf_to_images(pdf_path, base_output_dir, dpi=200, image_format="PNG")-> List[str]: + """ + 将PDF文件的每一页渲染为图片 + + 参数: + pdf_path (str): PDF文件路径 + page_output_dir (str): 输出图片的目录 + dpi (int): 图片分辨率,默认200 + image_format (str): 图片格式,默认PNG + + 返回: + int: 成功渲染的页面数量 + """ + pdf_filename = safe_filename(Path(pdf_path).stem) + page_output_dir = os.path.join(base_output_dir, pdf_filename) + + # 创建输出目录(如果不存在) + if not os.path.exists(page_output_dir): + os.makedirs(page_output_dir, exist_ok=True) + debug(f"创建输出目录: {page_output_dir}") + + try: + # 检查PDF文件是否存在 + if not os.path.exists(pdf_path): + error(f"PDF文件不存在: {pdf_path}") + return [] + + debug(f"开始渲染PDF: {pdf_path}") + debug(f"输出目录: {page_output_dir}") + debug(f"分辨率: {dpi} DPI, 格式: {image_format}") + + # 渲染PDF页面为图片 + pages = convert_from_path(pdf_path, dpi=dpi) + + debug(f"PDF总页数: {len(pages)}") + debug("📄 正在渲染 PDF 页面...") + + img_paths = [] + for i, page in enumerate(pages, start=1): + try: + # 生成图片文件路径 + img_path = os.path.join(page_output_dir, f"page_{i:03d}.{image_format.lower()}") + img_paths.append(img_path) + # 保存图片 + page.save(img_path, image_format) + debug(f"✅ 已保存 {img_path}") + + except Exception as e: + error(f"保存第 {i} 页失败: {e}") + continue + + debug(f"渲染完成: 成功保存{len(pages)} 页") + return img_paths + + except Exception as e: + error(f"渲染PDF失败: {e}") + return [] + + +def extract_images_from_word(doc_path, base_output_dir) -> List[str]: + """ + 从Word文档中提取所有图像 + + 参数: + doc_path (str): Word文档路径(.docx格式) + base_output_dir (str): 基础输出目录,会在此目录下创建以文档名命名的子文件夹 + + 返回: + int: 成功提取的图像数量 + """ + # 检查文件是否为.docx格式 + if not doc_path.lower().endswith('.docx'): + error(f"仅支持.docx格式的Word文档: {doc_path}") + return [] + + # 从文档路径提取文件名(不含扩展名) + doc_filename = safe_filename(Path(doc_path).stem) + + # 创建以文档名命名的子文件夹 + image_output_dir = os.path.join(base_output_dir, doc_filename) + + # 创建输出目录(如果不存在) + if not os.path.exists(image_output_dir): + os.makedirs(image_output_dir, exist_ok=True) + debug(f"创建输出目录: {image_output_dir}") + + try: + # 检查文档是否存在 + if not os.path.exists(doc_path): + error(f"Word文档不存在: {doc_path}") + return [] + + debug(f"开始从Word文档提取图像: {doc_path}") + debug(f"输出目录: {image_output_dir}") + + # 将.docx文件视为zip文件处理 + with zipfile.ZipFile(doc_path, 'r') as docx: + # 获取所有文件列表 + file_list = docx.namelist() + + # 筛选出图像文件(通常位于word/media/目录下) + image_files = [f for f in file_list if f.startswith('word/media/') and not f.endswith('/') and os.path.basename(f)] + + debug(f"找到 {len(image_files)} 个图像文件") + + img_paths = [] + for i, image_path in enumerate(image_files): + try: + # 提取图像文件名 + image_name = os.path.basename(image_path) + + # 确保文件名有效 + if not image_name or image_name == "media": + # 从路径中提取有意义的文件名 + parts = image_path.split('/') + for part in reversed(parts): + if part and part != "media": + image_name = part + break + else: + image_name = f"image_{i + 1}.png" + + # 添加文件扩展名如果缺失 + if not Path(image_name).suffix: + # 尝试从文件内容检测格式,否则使用默认png + image_name += ".png" + + # 生成输出文件路径 + output_path = os.path.join(image_output_dir, f"image_{i + 1:03d}_{image_name}") + img_paths.append(output_path) + # 提取并保存图像 + with docx.open(image_path) as image_file: + image_data = image_file.read() + + # 保存图像数据 + with open(output_path, 'wb') as f: + f.write(image_data) + + debug(f"✅ 已提取图像: {output_path}") + + except Exception as e: + error(f"提取图像 {image_path} 失败: {e}") + continue + + debug(f"Word文档图像提取完成: 成功提取 {len(image_files)} 个图像") + return img_paths + + except Exception as e: + error(f"提取Word文档图像失败: {e}") + return [] + + +def extract_images_from_ppt(ppt_path, base_output_dir) -> List[str]: + """ + 从PowerPoint演示文稿中提取所有图像 + + 参数: + ppt_path (str): PowerPoint文件路径(.pptx格式) + base_output_dir (str): 基础输出目录,会在此目录下创建以PPT名命名的子文件夹 + + 返回: + int: 成功提取的图像数量 + """ + # 检查文件是否为.pptx格式 + if not ppt_path.lower().endswith('.pptx'): + error(f"仅支持.pptx格式的PowerPoint文档: {ppt_path}") + return [] + + # 从PPT路径提取文件名(不含扩展名) + ppt_filename = safe_filename(Path(ppt_path).stem) + + # 创建以PPT名命名的子文件夹 + image_output_dir = os.path.join(base_output_dir, ppt_filename) + + # 创建输出目录(如果不存在) + if not os.path.exists(image_output_dir): + os.makedirs(image_output_dir, exist_ok=True) + debug(f"创建输出目录: {image_output_dir}") + + try: + # 检查PPT文件是否存在 + if not os.path.exists(ppt_path): + error(f"PowerPoint文档不存在: {ppt_path}") + return [] + + debug(f"开始从PowerPoint文档提取图像: {ppt_path}") + debug(f"输出目录: {image_output_dir}") + + # 将.pptx文件视为zip文件处理 + with zipfile.ZipFile(ppt_path, 'r') as pptx: + # 获取所有文件列表 + file_list = pptx.namelist() + + # 筛选出图像文件(通常位于ppt/media/目录下) + image_files = [f for f in file_list if f.startswith('ppt/media/') and not f.endswith('/') and os.path.basename(f)] + + debug(f"找到 {len(image_files)} 个图像文件") + + img_paths = [] + for i, image_path in enumerate(image_files): + try: + # 提取图像文件名 + image_name = Path(image_path).name + + # 验证文件名有效性 + if not image_name or image_name == "media": + parts = image_path.split('/') + for part in reversed(parts): + if part and part != "media": + image_name = part + break + else: + image_name = f"image_{i + 1}.png" + + # 确保有文件扩展名 + if not Path(image_name).suffix: + image_name += ".png" + + # 生成输出文件路径 + output_path = os.path.join(image_output_dir, f"image_{i + 1:03d}_{image_name}") + img_paths.append(output_path) + # 提取并保存图像 + with pptx.open(image_path) as image_file: + image_data = image_file.read() + + # 保存图像数据 + with open(output_path, 'wb') as f: + f.write(image_data) + + debug(f"✅ 已提取图像: {output_path}") + + except Exception as e: + error(f"提取图像 {image_path} 失败: {e}") + continue + + debug(f"PowerPoint文档图像提取完成: 成功提取{len(image_files)} 个图像") + return img_paths + + except Exception as e: + error(f"提取PowerPoint文档图像失败: {e}") + return [] + + +def extract_images_from_file(file_path, base_output_dir="/home/wangmeihua/kyrag/data/extracted_images", file_type=None): + """ + 通用函数:根据文件类型自动选择提取方法 + + 参数: + file_path (str): 文件路径 + base_output_dir (str): 基础输出目录 + file_type (str): 文件类型(可选,自动检测) + + 返回: + int: 成功提取的图像/页面数量 + """ + # 如果没有指定文件类型,根据扩展名自动检测 + if file_type is None: + ext = Path(file_path).suffix.lower() + if ext == '.pdf': + file_type = 'pdf' + elif ext == '.docx': + file_type = 'word' + elif ext == '.pptx': + file_type = 'ppt' + else: + error(f"不支持的文件类型: {ext}") + return [] + + # 根据文件类型调用相应的函数 + if file_type == 'pdf': + return render_pdf_to_images(file_path, base_output_dir) + elif file_type == 'word': + return extract_images_from_word(file_path, base_output_dir) + elif file_type == 'ppt': + return extract_images_from_ppt(file_path, base_output_dir) + else: + error(f"不支持的文件类型: {file_type}") + return [] + + +# 使用示例 +if __name__ == "__main__": + base_output_dir = "/home/wangmeihua/kyrag/data/extracted_images" + + # PDF文件处理 + pdf_path = "/home/wangmeihua/kyrag/22-zh-review.pdf" + pdf_imgs = extract_images_from_file(pdf_path, base_output_dir, 'pdf') + debug(f"pdf_imgs: {pdf_imgs}") + if len(pdf_imgs) > 0: + debug(f"成功处理PDF: {len(pdf_imgs)} 页") + else: + error("PDF处理失败") + + # Word文档处理 + doc_path = "/home/wangmeihua/kyrag/test.docx" + if os.path.exists(doc_path): + doc_imgs = extract_images_from_file(doc_path, base_output_dir, 'word') + debug(f"doc_imgs: {doc_imgs}") + if len(doc_imgs) > 0: + debug(f"成功处理Word文档: {len(doc_imgs)} 个图像") + else: + error("Word文档处理失败") + else: + debug(f"Word文档不存在: {doc_path}") + + # PowerPoint处理 + ppt_path = "/home/wangmeihua/kyrag/提示学习-王美华.pptx" + if os.path.exists(ppt_path): + ppt_imgs = extract_images_from_file(ppt_path, base_output_dir, 'ppt') + if len(ppt_imgs) > 0: + debug(f"成功处理PowerPoint: {len(ppt_imgs)} 个图像") + else: + error("PowerPoint处理失败") + else: + debug(f"PowerPoint文档不存在: {ppt_path}") \ No newline at end of file diff --git a/rag/transaction_manager.py b/rag/transaction_manager.py index 6303549..92d1294 100644 --- a/rag/transaction_manager.py +++ b/rag/transaction_manager.py @@ -20,7 +20,6 @@ class OperationType(Enum): VECTOR_SEARCH = "vector_search" RERANK = "rerank" - @dataclass class RollbackOperation: """回滚操作记录""" From b2088aec49f7a5d8c35990feac8c238369d93899 Mon Sep 17 00:00:00 2001 From: wangmeihua <13383952685@163.com> Date: Fri, 28 Nov 2025 16:20:20 +0800 Subject: [PATCH 6/6] rag --- models/embeddingmode.xlsx | Bin 15810 -> 15866 bytes rag/rag_operations.py | 316 ++++++++++++++++++++++++++++++++------ rag/ragapi.py | 246 +++++++++++++++++++++++++---- rag/service_opts.py | 2 +- wwwroot/test.ui | 5 + wwwroot/test_query.dspy | 7 +- 6 files changed, 488 insertions(+), 88 deletions(-) diff --git a/models/embeddingmode.xlsx b/models/embeddingmode.xlsx index 55a8a769113707df5d4783f488a170a9073bb456..af4ba5d97e3dacdfb1cc288564aa72fb701815b8 100644 GIT binary patch delta 9648 zcmZX4WmFzbwlxymoe(ey!B9Ft6;M8zY8KFaATSZ|1^FRwqB)K>uTx_rNaXmtlppY%`uvd z`KiJf8rdzB;?IQVf+bp+|KQ;Erq|p?f-m2dxoTC9&xh2ir(t00Br$RE)b5kfH>5^2%&#GCY)M6|0J;N`c!BPSu7WC##(Nm{^0u1TR^ z{g6yC6730r8Fc?hr0ff+Bn$Zg^Syy#!NL@FgxHIaKs4{oUdIiY`~hB>bvGaa5)8~A z4wQrN5n%b-?hP8UE*VUyogRg6!V5P#OpUu-grby#@}RWFIqN4bTH>{ISTPFYgqNpE zRHNWIHvXHy0{zpbO#$*6zOE^*Rn?NzVNrZfvS_R9@WlN!RN&41(INKREV&{a4mZ*z zr1}G&_a7T=IIkl2M4Jz=E%28LCnnp^G%GVeG?gMI8_a~naI}@yXhjvgw6xAA^Pfa< z60Cw`bVJz2B!+<`9G{Y>8>B{~4^{)EMkAnKZHC|%^45zOdu1s~h!>L;<91eUc-oD; z`x#9mFlFrOQ^Loa=412uuBS4&>h@rdBFxOiHj)N-!5D~}bqCY@i~H>1l>NF(N=M58 z_UjsAruMPg^YYj zlG==5Co4!QLVD!Y2#Jp^VW~`|pnEjf}qK zbeN56%DTZ1gNtS_IH0+I3Qs8HLDHsAEROH?1bYpUxSlZ<&13_wrA*-Fk z@ksn?MO4sRUck_W?l)bY1(XHy*h{Qq6p@>e8C~Y%p8KU>+7)2hvzJ+pqH!Bv(o#xB z1n`pAWni+t?1K?_kHdZVwFqS*WBh4DN}l+DkomLG0*020Pt!^rs2A=@m@8<8PnQbK z92pIlOEcs~6ugPvQpQw_3uhq`^fF1YX=7@0T+A_5*ty`_k-+j@#)4A zJl;rwGk)_M%zZ(9l_>vGUhxkn#!Yx66tlL~9soXsC6@2#F$T3&U&s&X3~9kU0A5AO ztB;W6_5uQKWGk+qlFlj|fuw<4E+e$sD1{xK?$6m2^%K&6_g}`ZKX|&{hs;J$HUbGC z@3_W<*nzSPOa~jBR252_E>kDC4#1a-*2JBakA9&OYvhxTUHkK-{N?xHgwhSmZxii; zfqN-|(UE;q>gibO3G<|#X#^${tfY@s9MrFuf1Yk2$n2$(&`4;LcRm+h?(h6*D@d{_ zLl{I1giTU#HkKoJ?kt3&jkq(Lpt;xsRFe9;HSB%$X(d=ZiZT5$Kd5$*xA*D?YjseB zj=LzVBo)Yy(9A(8%b`NPxb@hsOZtp&U^1!}gy{JQz+hJKOrbJn;$PBotE-It^i)gs zmptVfYY`;c*PhK60;n~l=Nn5DNt}>I_?>ztK(bAr`^Q}}8|^z97YQrw=#b$7%t>F1 z#Tbs-cGA!-@f{wgq8IT5=i)qjeib^!OP1)RL)_nIQ9hG8eiyO+vfBRnVqXz^xna$h zff{*ipE9h$phR7t7fM-YBeXQ5`9@eYEqA+#55AqzLb_c%dsGIJ*egW2D4RZbOA7KH z@aA-%W0-8I{jbI`sk3)C?$jHB9NbMz=m$Ce_$s^TEv)Bpce$^u1E3nDz=GufRcbJ} z?BYEpBum_+y_pL8Fmz$AUotIWNCbyxnWhf1?NW($Xv2)bFE%D&WYJT(i1T3w?G0)wN@W}lpD`65g_gK;7RAJ750{Y{mO;7c0B z!m9XZ;AgK9^xi?8=CHiBA^p!NncnvK=_=hk4*YaRE#wZA_Ky;!8N&AtW~b%E1u)iI zl)pTnUQlrn#kYE@_f^vUA!SZ|mnN+oL$~AV9{x{sVqMACOnLvi<=_8qJb>+QIEqo1 z{~L}v-osJA48Lz`e=PrC+Mq&VmNjo$A(5pvodiZeF?mKoS0}A;H8cK~p4nfQcO451 z1{)pNy}Vy%^^)NtUgABKSkkV(q{JHv5>+`w6d}>8(wM=)c|ppbX%Gq0*_If%A{+6U zQMrkHF)bBGwMrHhZeRpxsV>PurMb{eHaJHz;PUITA+VIjTuAVvva?Kd^FCZ=$+n_-PL5Zc*&(6C^$6d@xAAMdLtGaK0<$0<&>-0mq>i$bfNC5d z?;Sk8+dTadWjfC>sxgWqdklt^~X)dDS{a%$r>TrEJ9PBmT4iugIAsu`^LEVSpZRPm+N)EKhXB1d-+*4`%Ob zP#sKWV8=$Hl{6{q`faB(;O6&aD`q9>3CSLLFA0aJ*z%z#E-7UbLBSz8p?z#vw9wJ6d9elV z&(+~kamR4(rX*-V)Ki9`b2cNvPyxz`0=84=p*(IMS7F}#b)ueMg8w}A`r5 zgF0m=Etx?1n`aZA>zm!3j5Z2QJtc;ZZJRl2s!3J)PvD_dm7Yp>owFrm%^xSVQqzk& zJ)I{UJBrQfsuW8##QZH8&%3Px-mNN1G+&1o>sUQY7q@FgGb^pEBhj%`^X?JMC2Poq zx&K+3dkJAzbJw;sZ%M+j!tjVPHT2FqxvA%r@!Bp@GbDV#=IZrvx+AQYK<=!hl=j&# z^Nf$!8~_$ShV~Hg?BbkSW8xrS8o-P?AP@dJ5NK-Asv0El{2g>#T5g2pe9~cH&+3fB zz-dheGCvs3Vwv;gJ>rg{|5^<8<4B&mh^L?1q#?+I`SfbF`pe-~M#x zlvAk1#iiN#B3Q&1UC(!CP}2DLz44PiP$AO>1F(oPbNVm#8J4b|($^OXT$G?RR_fd; z6R0D_%88|_9pR1kRxPQ8H(sQ_n71PrC6q9lrw?avA#HP_88BDQW_r@FtQZUA6I6co zA%6?g&;IR{b-LuAI!cWPl-wAl_cfa}dz`3O!JssUj-)aS6*1_^EC)k;IEb|I2n2g(uDW}%`+%>ravjRc|*EM+GUlCzxOYhMipL(UCoPLkc;DrUt$gWcPbCo?YLYnkQCC6~*`qv)J!j#x z@hDFNr9wYG#&1z6;fGM~W``7CF3~UB|5~++F&JEkkMK$E`XqXwOKt6@=E;<$EdP(Z_E_dx|P!NUl7tR{G21xbWrT&`oW#b(0P#k#u zb^4d}?zAQ`-jK zOHJ;2N$a(f^D$D*dGRquM5P4(ub%T|!qvOc!4KdsCXk&nKOj5$uQ!5#dEE4!Tk+wM za(`6t8-3Paz^l^G*U2gX0(TsbmI2+X1I+WIZ@MYRO-KLEVggopG`)=RRc#V|V z`uWt@u7CISk#*(?=D+*CR43u(<9pz0f&~M^eP3*O+A(@KegQFIVE`KXj;l?((B+iQ7N2HMm(F>tcl2OYa;ndJh7LDe>@D5y(=9j9 z60vJxwVCi(Q6jY{^Oi$?AI621=^j}3MFm^devSVFHF35`Bv?*UVsJy)?K!M;6n`2+ zozhvg%fF`=X9fpFS^}tXb8@Nk8m7Q|sxXeS0xX16e=?&2*%4)L36awZNkgRu!|>tf z;;Dw4h5vXn%B9{$=_G9Yn|7~zkSOYL%NtsrKKEh;TlR{y1}*IsgjMFlip_Sm7U4)g7V*4>u6 zZz(^XsFPMlF208_yxM1@k5HWA!yb=`7B;rQXA4;P{6G$CkR8=RCEqq9>VGl z*T(z7J5)Qq&X>DKC;AVq5H-D^xaIKCFa{(NvD784IDSj@8^$h=@k?2-L~ZNnk;sax zX5Q~H%llg86bvBZ1pjWGb5PZ)^jaDz~=aaJT#Q?Je-azt!+bw0InQ$^J&l=GgQ97Q9nBWQjj;( z>Tw!$qMK+p98}YX_Cg~up%geYHUi9G<}}mteJmVWtq#P!5-<6 z)O&`*SiRR3>DEv$0?ok%G$GqLexfNVj0sJr)g{6rU*L8zD<XZK*Yc6VerJeZ|>MDBEMzzDfxkz!uI zgw>T~=XB)fs4dT5ew&)z8k?QGd+3HVsN6+^SNOqp`hf^+?D@lik~amTFn*$3%U%&A+DGA;|S2VsY)k~H>9 z&F06iBfV_x9davhmu|Qf~jAs8nx-6doZWNMY}{q$o#g3ZL~jrLZGO3b4qb{Z8#SvH4j=%s6Z%$MuC0&<#{lTcT>{xWr z>zNMQ`qAm>jqIy}{i0}jes)B^*n|~uwm7Yc57r!NM?9Akl`itS@L;_M;8jRxs5` z?gb&ZUB7KDU@;J!x;**ZywRQP`PgP1aknM$S?mJ?5r(2yViLa@7vH3##*#$8dvjn? zwq4d1A8UFSQQ!i>X0+I&+HeNj*6Ja})+XvJ8TdZ?Gx{r)O)K7K4jAE|YreGb_XVm{ za2TQ-FvT9|@-BM@ol<&k`m(0>aizC))jYW~g*h{ALU+~hSb=K#xKgW(dB?2?8Z22v z^Q26O*-+H`F_Yyb>GAThx^0_Fwb91d#4iMSPRKbk2GgeMH7` zHhJg${0rt$un0y%h>t3xlY!8QwQKSKxg;1Wmx#RP>5^kr{M@>DnpkQAhB>HX2=Q5E z>~~Ubdw*o@H_hsYz0bH(SVpDjHFU!XOtT>@1oTc2z7Q~zVrc;dkhQt;dLkB$=0H&W z_ooPKE^wL%&n>xz65aE6&{8;~SslB+WDGjHRftc`60P2g>kwSv4f60ixji%L3Pq`b;)9FG`kUPM)*x6^WlIf91>Z(J#&}(@B@vTfjNKi`Z1YzvdL%oYmc;2J4o4 zw>i0&;;zi32CI=%yo*v2T^49Gf`_7xcnmDMSWU(Dq86n};ij|uHO^LT&sN4^yA1(v z7c$^V08?tr!n@mP*NQE4diZvxUZqe~J%dHNW>N;15R%0CrSe95f6TlKu6WGvVe zgPi(Vtz#oX!WNx?|04m`52fx? z)iW4n5qpC2Ttr+m4MKpm=3rlhOO0wRva>Y?4Vj^ScVsK@l(fN-bD97{)^U(hPfeHd zpqjmw-kTS0eK%mAY-^s1-bP>97I8^oV5V`o1jXMz=WPY#HL!TNZS(m>8IDuOiKl-j zp!jCsc*p5ltGr2aBWmI<_fcTu%N88(^Y$u3E<@>g|5CP=jvBz7q1pV@W7OcMp&b`D z%URP-{)7d!ZY{ySp=M;v@UpS=BGsQlg@Kba^Qwcy;ojRom`L;aWyo{pG$cl`mh&Ja zc3@O;1AIg%n4DkO69z|&PGEOn1U;&b^WYX*4~OV$RANA* z6rr~o&7CB=#tHm~VMJ^o_!F2uZ^clI(iC4(zjJZ#otZJ8V>OxBkIW%sU06f@mKS$Q ztkBA3$ae}`_MuJ04v-Yvj4H2;buSd&5bz0yA|C>=u zAWC>zf9&;ppmSw0wf>>6Rw6C}p=OS5IoMkRGgM-MO@h^GZlwH8DWKPeB3VD_Dge2f zxKXEZ9VnEx6u4w`I_qdWZy9m>oi@>TEEQvT_^0jgXG(|PpXPHa4|>4J$3b*~BdXM- zg`mo|4F>6!w`n2EvoiFDbw(dA3H8Wd{NpMvRrIKiMduNLxg{i-#0kYNAQXoI6c zn?`$nyKrn*SHM)~YyNkf_L?65BaF?~{NfNP%!4w;D|4~8=N&lSZj(X>;}TWQl@K|Z zM9L)C9}d``f#170AI_GlrZ{F(_XK~?-5T`!{c^Cf-1+ly6tnk;qsb?GEU5eh)%?qk z7Jg63mNoa>G%Iouf~iYi3b1<7#AB?c^`>Qv{5r!9@5^ujx8*ySX(?Re5#l8)47i-% zYhYSmYP!wHWv@`~K9yQy9}9}qfH||5vas8QHaF2PT>?iS^xY4#_=F$3703}R|!+zccFSj$ao(u>@lm$RreNML2t%&fJAtxT<9uZ%BBDIdqkS@Id7 zu1ACGrUwt~vWa7^4AR;5#s`8OplM_+M#?p?u{q3i!ZApz>$x)J5ZD~mJfd9e_aM~B zA>^+dvjK)Iw61+^kCNHsnusYT#MYoy-#!jetq!Opqsj`S!fF|>%jA&lblMAZi3uF+ z`ZlJogvqB|&O>-Km~FE^Llyju9xLIRk}e+ZM$m4W^>E|jDK&``OB9J7PBG+**eO2s zbi4d!nAyn77$x?;R0lbuMU zz0gZ)l+{``-X)VE#4GD(mv>}6T?HA( z#5kb~&)J(H3;Yy%HM5aS1?EdY`daJ)f)Vll8cf(8=SA#r^(X>_7K*Kd$LU?bVIu(- zofzf})QYZHsX%dpOAe zUYqyMDLb%!TjKX0XGF;*)*S)^Zg@&*{T=AEwUqfw?@bRf;)|%Hk#;0vA3lnPQymz;N6sl5vbhoHF^m78I2KJjH%JXxw zBfG96-+HX34-_TZGyiz;z=Y2I83+R&g2?uyq)Hi5Ce?E?Uu`EfG8fwP;<>Jr)V8Xjs*7rA$+n@G4Cg2~| z-xBMgM^^?UKUVO=rg;0?qAap>HNfoty;-LjYVLeJY!Wh|U&<5u^++ENoBX7MIgBnQ zSo8yCyS5%1sy1;GBEqQ!`?jY33q69TdUnN3{=QeM?(eIy>rv4?ws)W_FN8b_~kBf9lfP`2A*ABfkONKNS18Zj}otEtmpbpqMGp5^`Mu@x<`_Zm&w zLBC6JrM`6F%J*y-%!`k!)8h0ivm^rPSEQvg$d>@|Vl%Oyox4;szhGS?K%AJB#M64p=$hTR)>cyc`rOfhb}*^5D62%G)3g`Np5Q<{A2%8AW>Vxm-E9-{j9J_)G?=py-jMIEw#v zQiW4=E)F66ISWImC$azJWyMgU%ggcQdF~Phh&qyL`Z!bL2rp;F^LlZ!lP@TS=^6n3 zO==x@y)jjC@I5b~icxrWS*^t+lFLN@tjPl)Ohp*YNSWwLeXug&{-zK~Mryq*Li$xz zio6#xSv`TGC!ge7ILck}hFSHbp#UC#_>AykNBolUsvK!pe5OCPEE*cPixx{lonV)M z4?ZY7_3Jr|wA3Kc=BKygk)!Q%36!%q!56+lENAdJK6D-EM`|4&gkk+E*|nR8oT6R8 z=PjmxhMHT9bYT8yBdt@|Y+Ys9LYI575QBHR8JoPt+fq>+cL%ljk3 zkh|l#ReUMEL+a3&$2y^y(<0G3dRM}S>7~DA9AQmLh*gq^SAL*%wxEwl4MADeho;Sp z3D;g;qA>>jh2Pmk0Z&KrAsq-(Q7(O9)g^p`&q|q>Q^YmC0`S z?d)GUkG707KR&sO{<+*xXy|OS{Pn3}N5Q_2N_#)X+so1;->%->-DARAnX%Cy;v324 z?oXb@=rX_SS}n%;6%X!h3iyd-Jgy6KIYBlKwO8z}MWq=y)tvfih~kOYA_+|B%X^WG zI)JtO19%9Cj!Ou<1!T{~3ce2d&IN+{=QI(Bn_CT>92Cnf1P*|vxS1fJ*g@bt>=1P9 zAXIjIkQxu_r+=?M;lRND9*X&UfrAmh-;Y25&*xA7nNEX&A^ip4uYciPpdKC`@Dk7q z4+r=(h?Q3eLX{Ha#>)$y3##Geg&6+;>gK}&LGoeY|L0K6zjDF<(ddQwha~+@`u9FQ z^#3IVApAp$(7Y|E8n=^9qVs k{}7G&u|EBijC)^;lfQrH-<;Arb z_aE8WnP;BK+AEVxCi(5t<5SfR4a^0#Vn{CU^aIjF;kF#zBoK_}Qa*&eFcA zzDX^QD#3Wuy;L$Skc%x2Z9P4taOQfYUeRlSr#o2x<3rvn+1FLpT88f|asi86j&t)= zg0H1r2$}gTxDHX*6ZL3jOxM{>k-n1m&Z~s|8JnqOj?Mh%WLwEGcudZQu7fU^Xse&Y zC*&CD%gF|1T3CbK_&z!+XZ{Ss!!L;yv!VSlTzs{~jRk_cG7hzAC` zWgX?K>;E9hMUK)@hJPb3YHWd4W33QPvo3SXscSsRF_cEONmxE_66KE)yO|#>Ng{S} z?(#T|m(r*{OV}u}Ms)d6FZlHU^)5FLtpr>stTKHVBYePtk%n!fOQO%S2t{{NM$Wvc2~v|R?UUk_Dy-O9QkX; z@D(QA{@=<8g&cy~CptMU8)yP{k;u`;Wt$_j7J6g(x77TAX$+kfj@whvE(~J^@iwho z4h+gdm}kvTOtp!+`SP%qu6bI?^`3c$&=ypk8_Ob88H`HO-Ju-=S%D9xm1Gu6On%|* zs!&(5S)cT>pkl6(lW#(mT^9zZden9|A#$M$-D5nPN@-=GvK1I)Zuu?Vq=$sX$DI6b zri;<`MA)i;iK6OVu=dhaY9^TrnXxm5xAEW`jv4IAOYBP7hOdIu0W7k{c?(O)G^F2M zmdoAe7#1b758ocb!F)J|?+*w4;3`LPTV2-$&ZG?}sDaVjcUH8$R(}>R5 zuAk|WPA46%+He0|n_*`<>$aJ9?qa!b(#UfG!B~IP?UQVbYV>XeWs zd6wnJ$M4-D43Oigv3hzEhPBZ~iC);{^6T>~iJY&vnBM+O!KVaj?}B_JKWOE~#cOU< zWyN@Vas>pNpUci@a`zO(=;IVBOX^mYBETp5Kkwr`!~AhvyqAd)2`WtGyBO*4Rz>g2 zRM`WL#2^N^{`hUbmXT8$JDy)w-Sd{7AK5zxFoJjqjZwYAu_h$X$0HJ18_lu-8RGv~ z0SOMZDEZ9^fj@6|J{7njPdoWpO<+A5zgIKtLMlaA9Xb-ocT5UAdpthDP&>$JV7($q z+<9GivHJi?D@(G?V;Vq%36|M!;w)6|Rhx~N2c!^dEYQsrif!MU6ffjB;!}s*7ZI4^ zNQ0~B+PcYKX;pT|SPV0?#?_VTnF)u#4@0ePLz z<)jz|m{~X66&Y!#c#OL8f~MN)4_hI{u7Ggw{Ss&yF!wbg+Tud)>Zsugjl4E2V*wHDdZ`VK%r7>?f}hOmflsz+Hb4f2Vk| z7e1?D4O`z0FBOm$`pbjdVw)g%#>mktS@FAzD~|%5BxkU#r2Oj4P8@IQ&S%JDC&W{>_0bwb!u2N9-Syq! zNU!b-D8~mH%i8+NQJ9>m5WJR0 zNxG#%4KpN&Dt<7qwQ_0snm3I4@ekv|otdHD+76$~P|k$r7?R>xjxkh?${%BG84 z8y(0XGjb+-V{d+(yOoboOIA$IK}#+M#jYDP;@DkB@d%?ezO>?X#p*CS<4{^Y%=mK9#xJR25d~Nkv|lt0c@0uA(HN-TkOZ>8a07VRj?sp zZ-zKR8Ij4ZrL7ec2W0r*29Yfs##c?FEvdi0(m$_|#*LV{4V89J!l*E7kQ{xl+T1Bd zl&QW4qR-P9`3_^@tKiinmU4v`%JG}op?tEoj=r$h^4c`_Bk!ru#BDfd)8XbhZAzBL z29Z%ZmwAr{x+og5h?nJxMhpGAk~>Su9dUI=l84xq>5f$CQ_v-eS zk1-%jnKC}O*nm9YT~&XGf*r<$eNe%zwN8d|u}`6TCK}!)IImU0k-^$$<z!z40i%*7wtCw^gGa`#>}~jzwX$tIwvg`L(iq zv|G3Kqp8h`HTlY_-iGYW=2$JuCLR?*saEf3F@PE3lf34(K6eUSa<5y6t@xF5MVawm z^D?)N(mN8+cYLW^%_!r7Dt>YK4=Xye>~0lHYhyUGhP1^@=UvH zle;^MrRiO%61fVb?47l^%2IL>DDIqDKIkomhn%MNeg_t4Ao#8VSH(%7Ke_6Qn_-$U zsu-QMM_y&VNbdT}A&G37w!`By<+C7?F3MUEO(%woKiL=tG=1M=-F_phF7yCAUm zrlg(6G+|2o14u%Z_8GS2LEY{`%~SM&jiKWP)vTg zx}fqh{zGb<9}OCQq01P&r1EP6x*b*@G|bsbJ+7AQ=AlRL@!kCz`!BNVi?$zcj9vfj5dYBgAYWGdevo-3#&13z(xVB4$c@$CgFoRT0EvHts?f975K+p_q-dIaWju8r(|I}v_4?DD zl@Q4)l;mZVgX`muXz)A13hj*PjsBjHo27}p>jLImWDC z<5%W^49@ew{*cZ%+Carj2EOfrMZMPa-4O%5OJ*?YRNwmqO<{^-Cb&iZ=GWTnzZx?f zosi)pXe^ZnKVfp!4q+!H-Mn^_6{vf~OId2r);KAO&3*)yaeDpye1j)}OM z_46($NG2+iEaMJ$e^UQ8PNr{>ot|>K^i1u+El>ncTIvn_;u0UAU*a#5&&NJEDe~L_ z0V8hb$4r~$T({kht0%~sOaET^xZ#W7F@(>1G}AxuLsqK_rK1NKg%rSwz1uQU{m_ct z=2C?YbEU4ap5N^qkcn&5*2@@2(~;G1xjZ10Q~5({P4D{}!A)XQRsRxBulUu}sEIYN z4N~dAlq=zK%X(!0)=AoZHGgM=?10&{bz;_CK1CU^C?D?!tItdZYtj87PG72N5rV9V z^p%Kvj!djwGLvaDX$b&>Ts@KFwC#hm+8o6y+YX!WClb|C;N%b^mG{-QRdfj$cAqV` zS;{uv`KgjL>hdhrYI>M)a2M<9`mMctUoB`NiL1B2fTPSvK)`naM^K93L$Ex7vl-6Z zb!;10ItuIQfzdEGC1p#5;Y`vQXLW4q_pwDdrk4)~(|J%ilFSL{n=*F!usZY?_xZ}i`jPf0~yG=zjnGW^IX@ie$Hy@ zNh<(GF9H#T4gEmqR_Ucr@N7`yesCYkGEx^P$(KbRL;@Jxy3ZM%oX*4N4C%#FW)Qs) zTb&qw!w~tq+CSsL{flzEYaV62cG)-Hp8~W zF%)&wSt@`p|8{Tc9f@cBk$AcWFVp@Y?HRGM!P!Rn&3=~b^JI-` zN283&SAX;&T%zhu-RIXVtU4HxN2ycECj+Coh3^hC5>2qM09pI^YMi#7 z*&>wb*}kF~zPr-|Ss$~N(+L;I;{-_cy*Z~yZ>T!Nyn}3PTO?Jeu+t53HRHMC&XGOr z#@g|D;B>+SN06k&{+58wHZpxYW%~& z-^+&Zti`qjQdTF-*|b6g`|Fe9<=$rdgF%S!B*%C+uQVOw=~3oPe||qLj3lV;Qemn_ zawi(aC`S4a@uepc4?YtU|6V=>kGN88jh6Ope7o@6C8Jo5c08(kF|g((RZ)_yVd&Is zl~0el`xJ*$siqczBR!oUY4n1?W03tr6V@WA9{(}n7!UJ>qo6-u~^smac3du_0<=HQ0$jyaFs|07n zRvVx}F6O6C09pPrt@xcJB@Z2d$JetNHJh z#nL}mcCcf(o!vrE08x7Py@6m@56iF7?@Vy(ubIWG_phG!u5kZM11K-)2dKa42Pmjm zER5PgQZ@u+q_$Ij%9pqq4*X0-28^q$ieYu=90^Ii>4JAw&yOgS=hr;Kt@~CY@ugi} zd+B(twa{4+5R+3T!I;Wtz?Y`w4vV8CRtrGdfUm|7xIiHj!qaL+m7Vmx93N}s0-G0t z3}16Z`^gBW@~Gp=QwJ39E(a6T3|v~N4u1Ozw-dfHbU{Bs{3V%aw)y^s6Zm=o*QcJy z6Oo=YQ#f2k<$;}cu89jUKkyso-9a`Olm~Dv8M8UHEv(jk)|- zBziCWt=E#HFV#`@xY2V`Y1WBk;Utc{vz$oWQ~H58Rl^@_Bf6+AC!X-jU)S~QW&I|a zGRvJm2Sw=GhL*ETmyX2t0f{Wh65#Ef#kvT0#Cx#RV7W*CnpDy zLwT+m&~QgGt-*vokUNDEIk-MkWeX}2mwXZ)U9F?TF0MxVKDhwV#0xR1G#OHSOZ?cM z61J`sLMs@#eR=}uOx_)wT)Y8?QH}{;qaxJlx6B#rRO8iT5l1J7iNq`|Hp1H}d-*l# zS5Z1zq_8nCN)6(=0k1(x+fC!_R_KO}uPpq*QA(ua@)CUXRj6?3#9Gqz)Fr>Vf2Z(o zV7JykRc3qZ&2xzu{&sAv;9SKwo?F`S>=aS zaRz-<^Haqa60u90Q0o#|N_M@dzrP!KE8~mHG@7fUJq3}`1a=(Y_x5|vM8?UYF(n0A z8OtBV51md!kB#WHRBod$YG%=ov&@~wP|3c`N7tZ^%yD59d`^Hf5-T;m7t66~PM~-E z@mv1r+ats*{OozHHRp?Q(MmJFO~kxHx+W_}8BcGd@-hmWm&=v-rMfA^S?zelS>jh* z^tS=y`~iw~QW`~CuFFL`m{$I?yXYY_i%v# zx3hY`N#zQ&kEwl`QFFJDHqo1O;NH^7ScI5=#9@?TC3981{#Q+juH)kI@~OY#?2u8Q3hKYw|KT{WG5q8uKlxomG?qL zV|~S47S;_5Eun(XH7L=xFA2|Mf1KQKPBVmP{n|TwfMwTUB*E!jvwBBCn|Kt^6gt^) z2aW}`RS)jGz$}u3M!Jmo$P?% zPIW3L_-2y#MDv(`8Ta|^Ia*tNe-s#r?>TUExE4u5)T@+;A5JJA-xeByM8S?rQDYx@ zN9{8~XOrbBRW@Y=^k))gJstJeB3=~-MTrK>^dK=IG|QyP-TvqT5W3>N(9-20#XUT> zC)%c3{Ej~GC`PxNVxKY@=Qwc~{UH21pT?xPI;HpaLk>IDr&uV=&)nlm;*8*LGu25{ zRnSG&YTCs`L-fv3AJBro)H!0d;)+w-4<;zp)RjS&24>V2kb-phhTqY}68SMC>(Hi$ zaMzn$!r8>%jO4$v0$5+WyQ%4MFj=-Y)K$Hjl}N`5NZ8#r*Ut7T@6a%|rUqqGVOz%m zU)&GVZ_6iIbdJc(?Z3OB1(DY`ELDwTFW5`c>RK};GnHu@by%1-5uo^{y!{da56M8w z0ow|x2=f-t$u;6v*gN#_GgSF52IJBScX;_CWoUy!-SdYNfF(Bu)wBByGj#9X6y(rM zvzu^&dxtfx&{t9>;B5`B_6>X9(o}6W9b%b0iP8(>+bYO)88zyF2G5EG>nrLS#&k*w znUL5A#vvzd;_=iCdut2Zm7lwdmvIyXxN+924mbm}E5xI=jcMcf!!yda29afUT0a|! z^&l%1%Ns_K0JE5k--jBH#Gpzr zPBks8g<&tR+P@RgrPS_`RxL-9^!9MDo2~v0jYi|=^Ze-nVdE8%wzoKxVu?YTNNep` zGZ`X=BDVCEBE6{v5kahhtuxe@u4$zYR(PJ<-uj<~@my8?5|6~ResL1Y|O@5tBQ(~A4)4kt*ohb*#S$3I+!OooXP+RTYLbEDnfc`NQbCqC7ixyIUI@e zA zGaYNsZ-se!W}p*_>;xvz)j2yhqeo-KuXjm*P0J81^+@r)jcMoqfo0j=E2-_hG`wIr zSbeh|u-mhMHi~(|y1X7VnPhwA81e>Wf1Ko zIP7LKx@qq^LU30kC~l}MV_cVZW6%saRq(Eg*w$>RJXYtQ&o=014Jj$A?s1otFtfOL zKDDm0x}#fH9a@cOJ{kz&4voE}oh77|O77c1#)gvA&9?4)I0lom1S?i0kLG28iNmDQb$yiLe&2 zz3rY7+fmG>wKh~^ap?=;P|jK5Ur;}w##2!Is+jWzq^+1^0#Z;Uz4*RBo=?k=2=B0i zl$5V;48!upz{k07yD7V)PoJRi+Qjfr@lWH6q$;t$T!jDoOCdL^;9`dv;aSjBk7%d2 zwh}~*0T6@gUa&8^`rpI%jeQh6Z8BNr@_4)nKdZ77O1e!}o%qak9=!KMc$!M2ezI?| z6-xgVK;r%zWqi`Mhw33@j&Ow|#sxFLkR_*mqNu;{imIR~uCjwgjMd0Gj4262 z@Zy?>+V5fk1>z;Mq3(;Wp8MIeTb~~rddLHa4A&iP;DeVY=&F;jp+_$7;*C$%p+t{6 zsu7XbiBi8|RPL5C%mb90uzm!i27(mfQ0_j(epqk(eT}``kjB6=?FtPK<8X+>=ia|7 z^@@VXrg-O@>Lht*H1XKRkcWwoXenkDvSaAh(>iGKG#a?Lyu<`ZV#Ot;%un3#r=x%x zwTb0Uis-Bk66~11vE8YUw}rcH4?AvHoOeFV!Je_RQ=pkbn*q5-ig*A|9@d>&;dj(5 zH?0QdbWG1=!8FLy()!8m?4kQ|xROHJbqFhFwn{ZysoC`5&dTcBy9`lbzq{>wtJy1a z=Yhk+h9{d8F!1>6;YW}hCDCga7+v6dOyBXz@VkM|JCmRH;9*(d%fZkP{B(7+jFOGu z-R+MDu&5NC`yE`Y+&XZ5tD)xTx?fEn$!hDoQi4aJl#TLYpSqM( z7h1;GA6JuXsuTk>RCMkwZ+-NuhrfNK1H8FAo?9>4TfxztUu~of4`#$Th9;+G1XNDF zjaCX@KMSX<-8s=jsqji!`^U#*e;m$xjan^S8{Scfi)fdRx z-*Y?4Hu2;FVt?cR^ZWrEJ>d`B5rwh%_KPFc)L|d*I?>yW81hnSn_#gax5ZA#^kIIj zp|!U?w3kxS%1@3<#%)wIg432pDU+(~vp+CIKt#LOoa1i5ZSPrDN?>9*#d)c8VUOgV z>tTI%qc#4Fi`gV@LfLPXtHV!%U3+3ik@5lvA1arNO*_D!kddl*1xQ!)F}YZlhbMOJCEtf)MNI{2C{bLZ@uszeb?nOF&0l`|9X}kol<&uC5|Vu=UhQY& zZF{LPV)ZdP?~q#TSi66S5gvjfM^YW2-{J>U?xc}|khx-PQ8UafJMW^%zuelM+ zF}7;@l$fE@U9}ZFPy=_|f%*{=Bu**-7$vjj`i18kJ;+ zUaLz!5$K-PzFE(<#%ZyqFVWH-?G@RrcNqmS2_q2n&aU_%xL8%+8hOt0S%X@k_cpD? z;2AN7xOSE+1`bA<7o5Y35BBC~gbUz;f mj5t6GIn(m-)f)fA0-}T?8~?sA0ik zu=wCL0Y*fBxWDl5(%piyXGtLbb5afZm;s!elGL!sg$BGOE_2)$W&jSrb`BIjE z8wFpJ{BLCbe{yT^ouDAJ9hh5)7dj4XCnN^*KnpGx5`xYG9|{S!5UpfGT?FcazjoqhhV{1e7s5_A!K(y9NPTm3KS z8S4c^4AvFFBK_~=_W$1V<`MsCjJk86N 0: + # 或者保留更多有用字符 + text = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9\s.;,\n/]', '', text) + else: + error(f"文件 {realpath} 无法提取任何文本内容") + text = "" # 确保为空字符串 + timings["load_file"] = time.time() - start_load debug(f"加载文件耗时: {timings['load_file']:.2f} 秒, 文本长度: {len(text)}") - if not text or not text.strip(): - raise ValueError(f"文件 {realpath} 加载为空") + # # 加载文件内容 + # text = fileloader(realpath) + # debug(f"pdf原生提取结果是:{text}") + # if len(text) == 0: + # debug(f"pdf原生提取失败,尝试扫描件提取") + # text = self.pdf_to_text(realpath) + # debug(f"pdf扫描件抽取的文本内容是:{text}") + # text = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9\s.;,\n/]', '', text) + # timings["load_file"] = time.time() - start_load + # debug(f"加载文件耗时: {timings['load_file']:.2f} 秒, 文本长度: {len(text)}") + # + # if not text or not text.strip(): + # raise ValueError(f"文件 {realpath} 加载为空") # 分片处理 document = Document(page_content=text) @@ -67,6 +96,40 @@ class RagOperations: return chunks + def pdf_to_text( + self, + pdf_path: str, + output_txt: Optional[str] = None, + dpi: int = 300, + lang: str = 'chi_sim+chi_tra+eng' + ) -> str: + """ + 将扫描版 PDF 转为文字(你原来的代码,一行调用版) + + 参数: + pdf_path: PDF 文件路径(字符串) + output_txt: 如果提供,会自动保存到这个 txt 文件(可选) + dpi: 图片分辨率,默认 300(越高越清晰) + lang: 语言包,默认中文简体+繁体+英文 + + 返回: + 提取出的完整文字(字符串) + """ + # PDF 转图片 + images = convert_from_path(pdf_path, dpi=dpi) + + # OCR 识别 + text = '' + for img in images: + text += pytesseract.image_to_string(img, lang=lang) + '\n' + + # 可选:自动保存到文件 + if output_txt: + with open(output_txt, 'w', encoding='utf-8') as f: + f.write(text) + + return text + async def generate_embeddings(self, request, chunks: List[Document], service_params: Dict, userid: str, timings: Dict, transaction_mgr: TransactionManager = None) -> List[List[float]]: @@ -149,6 +212,14 @@ class RagOperations: return result + # async def force_l2_normalize(self, vector: List[float]) -> List[float]: + # """万无一失的 L2 归一化""" + # arr = np.array(vector, dtype=np.float32) + # norm = np.linalg.norm(arr) + # if norm == 0: + # return vector # 全零向量无法归一化 + # return (arr / norm).tolist() + # 统一插入向量库 async def insert_all_vectors( self, @@ -200,7 +271,10 @@ class RagOperations: # 遍历 multi_results for raw_key, info in multi_results.items(): typ = info["type"] - + # vector = info["vector"] + # debug(f"从后端传回来的向量数据是:{vector}") + # emb = await self.force_l2_normalize(info["vector"]) + # debug(f"归一化后的向量数据是:{emb}") # --- 文本 --- if typ == "text": # raw_key 就是原文 @@ -253,7 +327,7 @@ class RagOperations: # "upload_time": upload_time, # "file_type": "face", # }) - # continue + continue # --- 视频 --- if typ == "video": @@ -289,7 +363,7 @@ class RagOperations: # "upload_time": upload_time, # "file_type": "face", # }) - # continue + continue # --- 音频 --- if typ == "audio": @@ -776,25 +850,150 @@ class RagOperations: debug(f"三元组匹配总耗时: {timings['triplet_matching']:.3f} 秒") return all_triplets - async def generate_query_vector(self, request, text: str, service_params: Dict, - userid: str, timings: Dict) -> List[float]: - """生成查询向量""" - debug(f"生成查询向量: {text[:200]}...") + async def generate_query_vector( + self, + request, + text: str, + service_params: Dict, + userid: str, + timings: Dict, + embedding_mode: int = 0 + ) -> List[float]: + """生成查询向量(支持文本/多模态)""" + debug(f"生成查询向量: mode={embedding_mode}, text='{text[:100]}...'") start_vector = time.time() - query_vector = await self.api_service.get_embeddings( - request=request, - texts=[text], - upappid=service_params['embedding'], - apiname="BAAI/bge-m3", - user=userid - ) - if not query_vector or not all(len(vec) == 1024 for vec in query_vector): - raise ValueError("查询向量必须是长度为 1024 的浮点数列表") - query_vector = query_vector[0] + + if embedding_mode == 0: + # === 模式 0:纯文本嵌入(BAAI/bge-m3)=== + debug("使用 BAAI/bge-m3 文本嵌入") + vectors = await self.api_service.get_embeddings( + request=request, + texts=[text], + upappid=service_params['embedding'], + apiname="BAAI/bge-m3", + user=userid + ) + if not vectors or not isinstance(vectors, list) or len(vectors) == 0: + raise ValueError("bge-m3 返回空结果") + query_vector = vectors[0] + if len(query_vector) != 1024: + raise ValueError(f"bge-m3 返回向量维度错误: {len(query_vector)}") + + elif embedding_mode == 1: + # === 模式 1:多模态嵌入(black/clip)=== + debug("使用 black/clip 多模态嵌入") + inputs = [{"type": "text", "content": text}] + + result = await self.api_service.get_multi_embeddings( + request=request, + inputs=inputs, + upappid=service_params['embedding'], + apiname="black/clip", + user=userid + ) + + query_vector = None + for key, info in result.items(): + if info.get("type") == "error": + debug(f"CLIP 返回错误跳过: {info['error']}") + continue + if "vector" in info and isinstance(info["vector"], list) and len(info["vector"]) == 1024: + query_vector = info["vector"] + debug(f"成功获取 CLIP 向量(来自 {info['type']})") + break + + if query_vector is None: + raise ValueError("black/clip 未返回任何有效 1024 维向量") + + else: + raise ValueError(f"不支持的 embedding_mode: {embedding_mode}") + + # 最终统一校验 + if not isinstance(query_vector, list) or len(query_vector) != 1024: + raise ValueError(f"查询向量必须是长度为 1024 的浮点数列表,实际: {len(query_vector)}") + timings["vector_generation"] = time.time() - start_vector - debug(f"生成查询向量耗时: {timings['vector_generation']:.3f} 秒") + debug(f"生成查询向量成功,耗时: {timings['vector_generation']:.3f} 秒,模式: {embedding_mode}") return query_vector + async def generate_image_vector( + self, + request, + img_path: str, + service_params: Dict, + userid: str, + timings: Dict, + embedding_mode: int = 0 + ) -> List[float]: + """生成查询向量(支持文本/多模态)""" + debug(f"生成查询向量: mode={embedding_mode}, image={img_path}") + start_vector = time.time() + + if embedding_mode == 0: + raise ValueError(f"纯文本没有这个功能,请重新选择服务") + + elif embedding_mode == 1: + # === 模式 1:多模态嵌入(black/clip)=== + debug("使用 black/clip 多模态嵌入") + inputs = [] + try: + ext = Path(img_path).suffix.lower() + if ext not in {".png", ".jpg", ".jpeg", ".webp", ".bmp"}: + ext = ".jpg" + + mime_map = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".webp": "image/webp", + ".bmp": "image/bmp" + } + mime_type = mime_map.get(ext, "image/jpeg") + with open(img_path, "rb") as f: + b64 = base64.b64encode(f.read()).decode("utf-8") + data_uri = f"data:{mime_type};base64,{b64}" + + inputs.append({ + "type": "image", + "data": data_uri + }) + debug(f"已添加图像({mime_type}, {len(b64) / 1024:.1f}KB): {Path(img_path).name}") + + except Exception as e: + debug(f"图像处理失败,跳过: {img_path} → {e}") + + result = await self.api_service.get_multi_embeddings( + request=request, + inputs=inputs, + upappid=service_params['embedding'], + apiname="black/clip", + user=userid + ) + + image_vector = None + for key, info in result.items(): + if info.get("type") == "error": + debug(f"CLIP 返回错误跳过: {info['error']}") + continue + if "vector" in info and isinstance(info["vector"], list) and len(info["vector"]) == 1024: + image_vector = info["vector"] + debug(f"成功获取 CLIP 向量(来自 {info['type']})") + break + + if image_vector is None: + raise ValueError("black/clip 未返回任何有效 1024 维向量") + + else: + raise ValueError(f"不支持的 embedding_mode: {embedding_mode}") + + # 最终统一校验 + if not isinstance(image_vector, list) or len(image_vector) != 1024: + raise ValueError(f"查询向量必须是长度为 1024 的浮点数列表,实际: {len(image_vector)}") + + timings["vector_generation"] = time.time() - start_vector + debug(f"生成查询向量成功,耗时: {timings['vector_generation']:.3f} 秒,模式: {embedding_mode}") + return image_vector + async def vector_search(self, request, query_vector: List[float], orgid: str, fiids: List[str], limit: int, service_params: Dict, userid: str, timings: Dict) -> List[Dict]: @@ -866,34 +1065,49 @@ class RagOperations: return unique_triples def format_search_results(self, results: List[Dict], limit: int) -> List[Dict]: - """格式化搜索结果为统一格式""" - formatted_results = [] - # for res in results[:limit]: - # score = res.get('rerank_score', res.get('distance', 0)) - # - # content = res.get('text', '') - # title = res.get('metadata', {}).get('filename', 'Untitled') - # document_id = res.get('metadata', {}).get('document_id', '') - # - # formatted_results.append({ - # "content": content, - # "title": title, - # "metadata": {"document_id": document_id, "score": score}, - # }) - #得分归一化 + formatted = [] for res in results[:limit]: - rerank_score = res.get('rerank_score', 0) - score = 1 / (1 + math.exp(-rerank_score)) if rerank_score is not None else 1 - res.get('distance', 0) - score = max(0.0, min(1.0, score)) - - content = res.get('text', '') - title = res.get('metadata', {}).get('filename', 'Untitled') - document_id = res.get('metadata', {}).get('document_id', '') - - formatted_results.append({ - "content": content, - "title": title, - "metadata": {"document_id": document_id, "score": score}, + # # 优先 rerank,其次用向量相似度(直接用,不要反) + # if res.get('rerank_score') is not None: + # score = res.get('rerank_score') + # else: + # score = res.get('distance', 0.0) + distance = res.get('distance', 0.0) + rerank_score = res.get('rerank_score', 0.0) + formatted.append({ + "content": res.get('text', ''), + "title": res.get('metadata', {}).get('filename', 'Untitled'), + "metadata": { + "document_id": res.get('metadata', {}).get('document_id', ''), + "distance": distance, + "rerank_score": rerank_score, + } }) + return formatted - return formatted_results \ No newline at end of file + # async def save_uploaded_photo(self, image_file: FileStorage, orgid: str) -> str: + # """ + # 把前端上传的图片保存到 /home/wangmeihua/kyrag/data/photo 目录下 + # 返回保存后的绝对路径(字符串),供 generate_img_vector 使用 + # """ + # if not image_file or not hasattr(image_file, "filename"): + # raise ValueError("无效的图片上传对象") + # + # # 为了安全,按 orgid 分目录存放(避免不同公司文件混在一起) + # org_dir = UPLOAD_PHOTO_DIR / orgid + # org_dir.mkdir(parents=True, exist_ok=True) + # + # # 生成唯一文件名,保留原始后缀 + # suffix = Path(image_file.filename).suffix.lower() + # if not suffix or suffix not in {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".gif"}: + # suffix = ".jpg" + # + # unique_name = f"{uuid.uuid4().hex}{suffix}" + # save_path = org_dir / unique_name + # + # # 真正落盘 + # image_file.save(str(save_path)) + # debug(f"图片已保存: {save_path} (原始名: {image_file.filename})") + # + # # 返回字符串路径,generate_img_vector 直接收 str 就行 + # return str(save_path) \ No newline at end of file diff --git a/rag/ragapi.py b/rag/ragapi.py index 355c20f..96b783c 100644 --- a/rag/ragapi.py +++ b/rag/ragapi.py @@ -6,10 +6,13 @@ import traceback import json import math import uuid -from rag.service_opts import get_service_params, sor_get_service_params +import os +from rag.service_opts import get_service_params, sor_get_service_params, sor_get_embedding_mode, get_embedding_mode from rag.rag_operations import RagOperations from langchain_core.documents import Document +REAL_PHOTO_ROOT = "/home/wangmeihua/kyrag/files" + helptext = """kyrag API: 1. 得到kdb表: @@ -134,7 +137,16 @@ async def fusedsearch(request, params_kw, *params): debug(f"params_kw: {params_kw}") # orgid = "04J6VbxLqB_9RPMcgOv_8" # userid = "04J6VbxLqB_9RPMcgOv_8" - query = params_kw.get('query', '') + query = params_kw.get('query', '').strip() + img_path = params_kw.get('image') + if isinstance(img_path, str): + img_path = img_path.strip() + relative_part = img_path.lstrip("/") + real_img_path = os.path.join(REAL_PHOTO_ROOT, relative_part) + if not os.path.exists(real_img_path): + raise FileNotFoundError(f"图片不存在: {real_img_path}") + img_path = real_img_path + debug(f"自动修复图片路径成功: {img_path}") # 统一模式处理 limit 参数,为了对接dify和coze raw_limit = params_kw.get('limit') or ( params_kw.get('retrieval_setting', {}).get('top_k') @@ -189,43 +201,211 @@ async def fusedsearch(request, params_kw, *params): service_params = await get_service_params(orgid) if not service_params: raise ValueError("无法获取服务参数") + # 获取嵌入模式 + embedding_mode = await get_embedding_mode(orgid) + debug(f"检测到 embedding_mode = {embedding_mode}(0=文本, 1=多模态)") - try: - timings = {} - start_time = time.time() - rag_ops = RagOperations() + # 情况1:query 和 image 都为空 → 报错 + if not query and not img_path: + raise ValueError("查询文本和图片不能同时为空") - query_entities = await rag_ops.extract_entities(request, query, service_params, userid, timings) - all_triplets = await rag_ops.match_triplets(request, query, query_entities, orgid, fiids, service_params, - userid, timings) - combined_text = _combine_query_with_triplets(query, all_triplets) - query_vector = await rag_ops.generate_query_vector(request, combined_text, service_params, userid, timings) - search_results = await rag_ops.vector_search(request, query_vector, orgid, fiids, limit + 5, service_params, - userid, timings) + # 情况2:query 和 image 都存在 → 报错(你当前业务不允许同时传) + if query and img_path: + raise ValueError("查询文本和图片只能二选一,不能同时提交") - use_rerank = True - if use_rerank and search_results: - final_results = await rag_ops.rerank_results(request, combined_text, search_results, limit, service_params, + # 3. 只有图片 → 以图搜图 走纯多模态分支 + if img_path and not query: + try: + debug("检测到纯图片查询,执行以图搜图") + rag_ops = RagOperations() + + timings = {} + start_time = time.time() + + # 直接生成图片向量 + img_vector = await rag_ops.generate_image_vector( + request, img_path, service_params, userid, timings, embedding_mode + ) + + # 向量搜索(多取 50 条再截断,和文本分支保持一致) + search_results = await rag_ops.vector_search( + request, img_vector, orgid, fiids, limit + 50, service_params, userid, timings + ) + + timings["total_time"] = time.time() - start_time + + # 可选:搜索完后删除图片,省磁盘(看你需求) + # try: + # os.remove(img_path) + # except: + # pass + + final_results = [] + for item in search_results[:limit]: + final_results.append({ + "text": item["text"], + "distance": item["distance"] + }) + + return { + "results": final_results, + "timings": timings + } + except Exception as e: + error(f"融合搜索失败: {str(e)}, 堆栈: {traceback.format_exc()}") + return { + "records": [], + "timings": {"total_time": time.time() - start_time if 'start_time' in locals() else 0}, + "error": str(e) + } + + if not img_path and query: + try: + timings = {} + start_time = time.time() + rag_ops = RagOperations() + + query_entities = await rag_ops.extract_entities(request, query, service_params, userid, timings) + all_triplets = await rag_ops.match_triplets(request, query, query_entities, orgid, fiids, service_params, + userid, timings) + combined_text = _combine_query_with_triplets(query, all_triplets) + query_vector = await rag_ops.generate_query_vector(request, combined_text, service_params, userid, timings, embedding_mode) + search_results = await rag_ops.vector_search(request, query_vector, orgid, fiids, limit + 50, service_params, userid, timings) - debug(f"final_results: {final_results}") - else: - final_results = [{k: v for k, v in r.items() if k != 'rerank_score'} for r in search_results] - formatted_results = rag_ops.format_search_results(final_results, limit) - timings["total_time"] = time.time() - start_time - info(f"融合搜索完成,返回 {len(formatted_results)} 条结果,总耗时: {timings['total_time']:.3f} 秒") + use_rerank = True + if use_rerank and search_results: + final_results = await rag_ops.rerank_results(request, combined_text, search_results, limit, service_params, + userid, timings) + debug(f"final_results: {final_results}") + else: + final_results = [{k: v for k, v in r.items() if k != 'rerank_score'} for r in search_results] - return { - "records": formatted_results, - "timings": timings - } - except Exception as e: - error(f"融合搜索失败: {str(e)}, 堆栈: {traceback.format_exc()}") - return { - "records": [], - "timings": {"total_time": time.time() - start_time if 'start_time' in locals() else 0}, - "error": str(e) - } + formatted_results = rag_ops.format_search_results(final_results, limit) + timings["total_time"] = time.time() - start_time + debug(f"融合搜索完成,返回 {len(formatted_results)} 条结果,总耗时: {timings['total_time']:.3f} 秒") + + return { + "records": formatted_results, + "timings": timings + } + except Exception as e: + error(f"融合搜索失败: {str(e)}, 堆栈: {traceback.format_exc()}") + return { + "records": [], + "timings": {"total_time": time.time() - start_time if 'start_time' in locals() else 0}, + "error": str(e) + } + +# async def fusedsearch(request, params_kw, *params): +# """ +# 融合搜索,调用服务化端点 +# +# """ +# kw = request._run_ns +# f = kw.get('get_userorgid') +# orgid = await f() +# debug(f"orgid: {orgid},{f=}") +# f = kw.get('get_user') +# userid = await f() +# debug(f"params_kw: {params_kw}") +# # orgid = "04J6VbxLqB_9RPMcgOv_8" +# # userid = "04J6VbxLqB_9RPMcgOv_8" +# query = params_kw.get('query', '') +# # 统一模式处理 limit 参数,为了对接dify和coze +# raw_limit = params_kw.get('limit') or ( +# params_kw.get('retrieval_setting', {}).get('top_k') +# if isinstance(params_kw.get('retrieval_setting'), dict) +# else None +# ) +# +# # 标准化为整数值 +# if raw_limit is None: +# limit = 5 # 两个来源都不存在时使用默认值 +# elif isinstance(raw_limit, (int, float)): +# limit = int(raw_limit) # 数值类型直接转换 +# elif isinstance(raw_limit, str): +# try: +# # 字符串转换为整数 +# limit = int(raw_limit) +# except (TypeError, ValueError): +# limit = 5 # 转换失败使用默认值 +# else: +# limit = 5 # 其他意外类型使用默认值 +# debug(f"limit: {limit}") +# raw_fiids = params_kw.get('fiids') or params_kw.get('knowledge_id') # +# +# # 标准化为列表格式 +# if raw_fiids is None: +# fiids = [] # 两个参数都不存在 +# elif isinstance(raw_fiids, list): +# fiids = [str(item).strip() for item in raw_fiids] # 已经是列表 +# elif isinstance(raw_fiids, str): +# # fiids = [f.strip() for f in raw_fiids.split(',') if f.strip()] +# try: +# # 尝试解析 JSON 字符串 +# parsed = json.loads(raw_fiids) +# if isinstance(parsed, list): +# fiids = [str(item).strip() for item in parsed] # JSON 数组转为字符串列表 +# else: +# # 处理逗号分隔的字符串或单个 ID 字符串 +# fiids = [f.strip() for f in raw_fiids.split(',') if f.strip()] +# except json.JSONDecodeError: +# # 如果不是合法 JSON,按逗号分隔 +# fiids = [f.strip() for f in raw_fiids.split(',') if f.strip()] +# elif isinstance(raw_fiids, (int, float)): +# fiids = [str(int(raw_fiids))] # 数值类型转为字符串列表 +# else: +# fiids = [] # 其他意外类型 +# +# debug(f"fiids: {fiids}") +# +# # 验证 fiids的orgid与orgid = await f()是否一致 +# await _validate_fiids_orgid(fiids, orgid, kw) +# +# service_params = await get_service_params(orgid) +# if not service_params: +# raise ValueError("无法获取服务参数") +# # 获取嵌入模式 +# embedding_mode = await get_embedding_mode(orgid) +# debug(f"检测到 embedding_mode = {embedding_mode}(0=文本, 1=多模态)") +# +# try: +# timings = {} +# start_time = time.time() +# rag_ops = RagOperations() +# +# query_entities = await rag_ops.extract_entities(request, query, service_params, userid, timings) +# all_triplets = await rag_ops.match_triplets(request, query, query_entities, orgid, fiids, service_params, +# userid, timings) +# combined_text = _combine_query_with_triplets(query, all_triplets) +# query_vector = await rag_ops.generate_query_vector(request, combined_text, service_params, userid, timings, embedding_mode) +# search_results = await rag_ops.vector_search(request, query_vector, orgid, fiids, limit + 50, service_params, +# userid, timings) +# +# use_rerank = False +# if use_rerank and search_results: +# final_results = await rag_ops.rerank_results(request, combined_text, search_results, limit, service_params, +# userid, timings) +# debug(f"final_results: {final_results}") +# else: +# final_results = [{k: v for k, v in r.items() if k != 'rerank_score'} for r in search_results] +# +# formatted_results = rag_ops.format_search_results(final_results, limit) +# timings["total_time"] = time.time() - start_time +# debug(f"融合搜索完成,返回 {len(formatted_results)} 条结果,总耗时: {timings['total_time']:.3f} 秒") +# +# return { +# "records": formatted_results, +# "timings": timings +# } +# except Exception as e: +# error(f"融合搜索失败: {str(e)}, 堆栈: {traceback.format_exc()}") +# return { +# "records": [], +# "timings": {"total_time": time.time() - start_time if 'start_time' in locals() else 0}, +# "error": str(e) +# } # async def text_insert(text: str, fiid: str, orgid: str, db_type: str): async def textinsert(request, params_kw, *params): diff --git a/rag/service_opts.py b/rag/service_opts.py index 9cb7248..559ade5 100644 --- a/rag/service_opts.py +++ b/rag/service_opts.py @@ -94,7 +94,7 @@ async def sor_get_embedding_mode(sor, orgid) -> int: async def get_embedding_mode(orgid): db = DBPools() - debug(f"传入的orgid是:{orgid}") + # debug(f"传入的orgid是:{orgid}") dbname = get_serverenv('get_module_dbname')('rag') async with db.sqlorContext(dbname) as sor: return await sor_get_embedding_mode(sor, orgid) diff --git a/wwwroot/test.ui b/wwwroot/test.ui index 622ccf2..1a5caa5 100644 --- a/wwwroot/test.ui +++ b/wwwroot/test.ui @@ -18,6 +18,11 @@ "editable": true, "rows": 5 }, + { + "uitype": "image", + "name": "image", + "label": "上传查询图片(可选)" + }, { "name": "fiids", "uitype": "checkbox", diff --git a/wwwroot/test_query.dspy b/wwwroot/test_query.dspy index 381290a..6e49ffa 100644 --- a/wwwroot/test_query.dspy +++ b/wwwroot/test_query.dspy @@ -7,14 +7,15 @@ if not orgid: message='请先登录' ) -fiids = params_kw.fiids query = params_kw.query +image = params_kw.image +fiids = params_kw.fiids limit = params_kw.limit -if not query or not fiids or not limit: +if (not query and not image) or not fiids or not limit: return UiError( title='无效输入', - message='请输入查询文本并选择至少一个知识库' + message='请输入查询文本或上传image并选择至少一个知识库和填写返回条数' ) try: