From bd9d37749d299c1f14e27ba608c193888f0d6fc0 Mon Sep 17 00:00:00 2001 From: bg2bkk Date: Wed, 23 Mar 2016 21:18:30 +0800 Subject: [PATCH 1/4] createDict and queryDict via Ternary search tree --- .gitignore | 3 + MyDict.c | 206 +++++++++++++++++++++++++++++++++++++++++++------ makefile | 9 +-- raw-dict_debug | 60 ++++++++++++++ 4 files changed, 249 insertions(+), 29 deletions(-) create mode 100644 .gitignore create mode 100644 raw-dict_debug diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8c5fd4d --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*~ +*.o + diff --git a/MyDict.c b/MyDict.c index f220eae..e93494c 100644 --- a/MyDict.c +++ b/MyDict.c @@ -6,6 +6,9 @@ * use the ascii set */ #define R 128 +//#define DEBUG + +#define TERN /* * Node definition of the diction tree @@ -15,6 +18,12 @@ typedef struct TrieNode { char *inter;//save the word's interpretation } TrieNode; +typedef struct TernNode { + struct TernNode *child, *lchild, * rchild; + char key; + void *data; +} TernNode; + /* * Insert a word and it's interpretation into the diction tree。 * root初始指向树根,word是要插入的单词,inter指向单词对应的意思。 @@ -174,29 +183,180 @@ TestAndTolower(char *word) { } return 1; } + + +TernNode *newNode(char val){ + TernNode *node = (TernNode *)calloc(1, sizeof(TernNode)); + if (!node) { + fprintf(stderr, "ERROR: calloc new TernNode \n"); + exit(1); + } + node->child = node->lchild = node->rchild = NULL; + node->data = NULL; + node->key = val; + return node; +} + + +void insertDict(TernNode **root, char *word, char *data){ + + if(!word || !data){ + fprintf(stderr, "ERROR: word or data == NULL in %s\n", __FUNCTION__); + exit(1); + } + + TernNode *node = *root; + int len = strlen(word); + + if(node == NULL) { + + *root = node = newNode(*word); + + if(len == 1) { + len = strlen(data) + 1; + node->data = malloc(len); + memcpy(node->data, data, len); + + } else if(len > 1){ + + return insertDict(&(node->child), word+1, data); + + } else { + + fprintf(stderr, "ERROR: word parsing error in %s\n", __FUNCTION__); + exit(1); + } + } else { + + if (node->key == *word){ + + return insertDict(&(node->child), word+1, data); + + } else if (node->key > *word) { + + return insertDict(&(node->lchild), word, data); + + } else if (node->key < *word) { + + return insertDict(&(node->rchild), word, data); + + } + } +} + +TernNode *createDict() { + FILE *fp = NULL; + char word[300], inter[300]; + size_t wordNumber = 0; + /* + * 打开同一个目录下的原始文件。 + */ +#ifdef DEBUG + fp = fopen("raw-dict_debug", "r"); +#else + fp = fopen("raw-dict", "r"); +#endif + if (!fp) { + fprintf(stderr, "FATAL ERROR: raw-dict not exist\n"); + exit(1); + } + + TernNode *root = NULL; + /* + * 读取原始文件,单词放到word数组中,对应的中文意思放到inter数组中。 + */ + while (fgets(word, sizeof(word), fp) && fgets(inter, sizeof(word), fp)) { + + /* + * 插入到字典中。 + */ + word[strlen(word) - 1] = '\0'; + inter[strlen(inter) - 1] = '\0'; +#ifdef DEBUG + printf("%s %s\n", word, inter); +#endif + wordNumber++; + insertDict(&root, word, inter); + } + fclose(fp); + printf("*****Total number of words is %u.*****\n", wordNumber); + return root; + +} + +TernNode *queryDict(TernNode *root, char *word) { + if(!root || !word){ + return NULL; + } + + if(root->key == *word){ + int len = strlen(word); + if(len == 1) + return root; + else + return queryDict(root->child, word+1); + } else if (root->key < *word) { + return queryDict(root->rchild, word); + } else { + return queryDict(root->lchild, word); + } + +} + int main(int argc, char *argv[]) { - char query[200]; - TrieNode *dict; - - struct timeval start, end; - gettimeofday(&start, NULL); - dict = CreateDict(); - gettimeofday(&end, NULL); - - printf("*****建立词典耗时 %.4f s.*****\n", 1.0 * (1000000 * (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec)) / 1000000); - printf("*****Input --quit for quiting.*****\n"); - do { - printf(">>>>>>>"); - scanf("%s", query); - if (TestAndTolower(query) == 0) { - printf("Invalid input"); - continue; - } - if(!strcmp("--quit", query)) - break; - QueryDict(dict, query); - } while (1); - system("cd && clear"); - return 0; + char query[200]; +#ifndef TERN + TrieNode *dict; + + struct timeval start, end; + gettimeofday(&start, NULL); + dict = CreateDict(); + gettimeofday(&end, NULL); + + printf("*****建立词典耗时 %.4f s.*****\n", 1.0 * (1000000 * (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec)) / 1000000); + printf("*****Input --quit for quiting.*****\n"); + do { + printf(">>>>>>>"); + scanf("%s", query); + if (TestAndTolower(query) == 0) { + printf("Invalid input"); + continue; + } + if(!strcmp("--quit", query)) + break; + QueryDict(dict, query); + } while (1); + system("cd && clear"); +#else + TernNode *dict; + + struct timeval start, end; + gettimeofday(&start, NULL); + dict = createDict(); + gettimeofday(&end, NULL); + + printf("******createDict successfully*****\n"); + printf("*****建立词典耗时 %.4f s.*****\n", 1.0 * (1000000 * (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec)) / 1000000); + printf("*****Input --quit for quiting.*****\n"); + do { + printf(">>>>>>>"); + scanf("%s", query); + if (TestAndTolower(query) == 0) { + printf("Invalid input"); + continue; + } + if(!strcmp("--quit", query)) + break; + TernNode *node = queryDict(dict, query); + if(node && node->data){ + printf("%s\n", node->data); + } else { + printf("\n"); + } + } while (1); + system("cd && clear"); +#endif + + return 0; } diff --git a/makefile b/makefile index 5ff67d3..a3c8c76 100644 --- a/makefile +++ b/makefile @@ -1,9 +1,6 @@ -MyDict : MyDict.o - gcc -o MyDict MyDict.o - -MyDict.o : MyDict.c - gcc -c MyDict.c +MyDict : + gcc MyDict.c -o MyDict.o -O0 -g clean: - rm MyDict.o + rm MyDict *~ -rf diff --git a/raw-dict_debug b/raw-dict_debug new file mode 100644 index 0000000..64801b9 --- /dev/null +++ b/raw-dict_debug @@ -0,0 +1,60 @@ +a +n.(A)As 或 A's 安(ampere);(a) art.一;n.字母A /[军] Analog.Digital,模拟/数字 /(=account of) 帐上 +aaal +American Academy of Arts and Letters 美国艺术和文学学会 +aachen + 亚琛[德意志联邦共和国西部城市] +aacs +Airways and Air Communications Service (美国)航路与航空通讯联络处 +aah + [军]Armored Artillery Howitzer,装甲榴弹炮;[军]Advanced Attack Helicopter,先进攻击直升机 +aal +ATM Adaptation Layer,ATM适应层 +aapamoor +n.[生]丘泽,高低位镶嵌沼泽 +aapss +American Academy of Political and Social Science 美国政治和社会科学研究院 +aar +n.阿勒河(位于瑞士中部,亦作Aare) +aardvark +n.[动]土豚 +aardwolf +n.[动]土狼 +aare + 阿勒河[瑞士中部] +aarnet +the Australian Academic and Research Network,澳大利亚科研网 +aaron +n.[圣经]亚伦(摩西之兄,犹太教的第一祭司长) +wally +adj.<苏格兰>好的,第一流的,大的,令人喜爱的 +wallydraigle +n.=wallydrag +walnut +n.胡桃,胡桃木 +walpurgisnacht + <德>n.=Walpurgis Night +walrus +n.[动]海象,海象胡须 +walsall + 沃尔索耳[英国英格兰中部城市] +walter +n.沃尔特(亦作Walt)(m.) +waltham + Forest 沃尔瑟姆福雷斯特[英国英格兰东南部城市](在大伦敦郡的东北部) +walty +adj.<古>(船)易倾侧的,易横倾的 +waltz +n.华尔兹舞,华尔兹舞曲adj.华尔兹舞的,华尔兹舞曲的vi.跳华尔兹舞,前进,旋转vt.迫使前进,与...跳华尔兹舞 +wampanoag +n.万帕诺亚格人 +wampee +n.[植]黄皮 +wampish +v.<苏格兰>(使)波动,(使)起伏,(使) 前后摇摆,使晃动 +zythepsary +n.酿造所 +zythum +n.古代啤酒 +zyzzyva +n.[动]象鼻虫 From 61062770f9fad0fc334bdaa552b77a4786d2b69e Mon Sep 17 00:00:00 2001 From: bg2bkk Date: Mon, 28 Mar 2016 18:39:39 +0800 Subject: [PATCH 2/4] add colorful cmd --- .MyDict.c.swp | Bin 0 -> 20480 bytes MyDict.c | 62 +++++++++++++++++++++++++++----------------------- 2 files changed, 34 insertions(+), 28 deletions(-) create mode 100644 .MyDict.c.swp diff --git a/.MyDict.c.swp b/.MyDict.c.swp new file mode 100644 index 0000000000000000000000000000000000000000..b95326e3074e0653aae6209b115acd147693edd0 GIT binary patch literal 20480 zcmeI23vg7`8OLwM*IKPsZMB_FJp>`UAy2?c4Mb7EGL4`Z95pb^vU@jK+3d!B00EIG zuY^}ni3rF;Ab>&)7LkwyvA)`|9otT)+G^`#+`AiE#ZhM*#hJGM?>z3kyPE`1I-Pdz z&ir=w-h0lu-}%mW&iT$c->lL*Cf&lWEgq%ebBU(C==$=G8kWA$F2UUEyr_@FN?e~NtrWOW`n8gMl5O*D{* z2Z}~tG;QRFGTudp2AR3c-nQYuYE^8L$W}0GEQ}7a+a?M8O1b!lh{+f)wZk`@lA^ z4zz$JU_7`A{PlcI`yF@Sg4e-|UT8`ue+0Bb-MxE1^y3;|bw6R?xdK?}GIaJ!Lq>tY2Z8opSn z>CxWCrM)L9+4)pz@ls>i!H;{Iky3M&6lktsIL;7@;;+=hY%Gh#qai)) zrW>Ae@h}+n>+{OB+Y)+|2dcQMqCtIf#ILi0Xe1IZWWHK&lojwSY=Jf>67*}N0%(Aq z8%>*y-76t!YTM!76%V5Rcr_(Xty*rZeaK!-p^0M3DO4tIkVYV>wor3L^>`v0=H)`V z`Y41Cxbq6=Qx%DOLoA%Ao25rtB*5zwW5F03nkeQ8rib$i`Q6Dp4OK7T3q@kOJ5Wy- zDBd+G9Mhw53deDax{;uT46o6nl*UEZBLDE=bX`)Sgcurg4UL)au;<~qtEFZ|QLKoS z&49Y|rb6^ldXlrI=y?!i<)2p_12ZUd2e zSaP1qfX2^@@|0%(Y<|S(X*AZZN_IA)Yos?lZ5(M$cCLi%_WCM0PcEnw(HGUdah)3} zLk%L)VzKpsxzvEwBTGwXNSo#V@o?g#$_cE1t8((R%F1%>CSEVzIY@9N z3M)|E-B`Bt*y26C`xmFz?MZg7IJS8^RB3f7WBCy@J_&^mo@-b3TELD{kE3bU^d4Pj zY}#U04drFjP7VuXL4*sP%vayQ`)*N%552)(&4;fUMHSN8MgJ>DXOUU#rssm?h%u#a z&T3&7H%DsXKE+^T$1>6>Z-19-fOO1FZDblk<{&bhS%yK$xKW&$ zYMo+zPFj)EF=y98riWrWbzoHqG2X)%al`gd&IA1ZX|y>?5oVGx-r(jLYxUD=_A}k? zGy)D_N_La$J1HA%OC$A7DQSe@?!j1A=2=iuB8LBCtw)WnT}I1enA^)z>z~6+o^C#z zdU}Pib4hybGMWmwhl2a?M##BQGlQ{qZ}M<+vTGS1phS~c!$co-C}hKjlMwb;?MM65}<<24VhW#uU&xh+s zPG3FL<&6g;VY-a4c&*OJ`=A@~sIF59d=6q&dNfQjP{2hlFDZ$6=jdEk z(oRlQG#WB_X{IC3-&Q?!2J-4&UoD&~xc9Z*7@G?39f!qmx{R#Kv_$4|I%+Rd7}n##&Xj8;V_Fey-3yc4jP9;bqa1jD{i z0%DGd2kZ3W+6vnp%-*5ktn7Q3*=w@z`GXOtSt{z%yVc zxEI_4t^${WB-ZWkfWLva!CT-Ba0qM%t>AvJ5LAQF;78yqtmSFFUjs_O5a0sGv2K3_ zw1IVCC8z;+f$P99a6UK-`~z$Gzk^?cHqZ=eK@GSWOavE$`>@6z4@QG40j>An0EfT| zFbhlqW5DI$pIFb+`hFK!4DJNuK|c5)I2U{fJ-rTo1zrUQ0oR?>oAcvnz|nxC0Y?Ll z2L3}DSjZhM_*!yL5c?V05IyV<2GoOCLoBY>x$}H}+Vk>-bT8lkK^FR`eY@$UBS%)3 z*3r%ozt7l0Cil^3KT6)CAM=gso>>Infl_xVrCO`n?$KYBL2eY1msM0jUSj5pf&n*o zz#z3P2|O+9qe|k^mz9q!D<73n2TE!Am%OyG>|S#Vmweblah>^U$~O!9JhL-u4uJ;4 zXY|$8yYq^Q?o9;a*iGZ5c|00rk)Iq5`6A(XFr1KHu}X$tCI6)G9B&A|axh$t^-yw84BV1;O@MkF;lXP8mRXveU0ctUn)(7_}HXT zPgL$vS-JfYOBj`w?>SGRYB8ZQY)D3-GZ1MIsuE*+{ z4J{spWq_Hnkd+meVjs<2R?0v07uH`KpEEP2`>?+pi+iJSdFf%p&8KjtGal2m$HPjP zEXY&t(n$OGWQbHCwl!({kHr!`pB{?^5}{CottJ}JW#!9Jmr8*BX>!^l0l&AwJq%(O zitRA7$;dvjRJLB}wiK0_nO8GAyIkMUJ78Wy6@?7%!5ATBv^od9s~kfIww~0(fk%$M z#s}+kq|dIE*`-dUJDFW6rwgf8>96aYa<>_v%cytcc7^{pQ&&x%X;uB3q^>;T)re9!>Kf-&H7K=A@^VxQ&>@LTXI zcp1C|o&v2P0qzF3gTdej;6h*^Cg5H0JU9gQfP27Q;C3(-TnkFUZx9pk5=ekJ2!fx1 zG2naPL&N~Q3l4$(U?Yfv5SR|G1m}Ts!P#IC_ycU~FxUyUfX#s08#%~ipfFjneNTM_ zhAXy+vC~HnbNa71lcPs8olr)0-+gQ_@4h&kliQK3!0N2lJxZ;H3!VXkg&HVwdXt(9 z_(cCTPT5IB`$0QhTeS&VC+@?9SR|0O(NC%ALwBu%_e@9MbX58~gC7zM&_F7-7)5zd z8}09^LwA1R$vl=<3F*};ozPas;k-&l`{~wyCbpxq59(C{+BByBXYRzFm#jqS>wJ|C zWX{yNJ-rN7pr1^ug3~4plRvYX**xE;Ec6-p$W0yEXP@#@PMTBXLBGum20Ze&oV&h~ z&GhTD5;a&ii1>h9W3jZFI`xnN5>=|GpW}B*!KZX?V7Vvo^gQPf-cq;SITrnwGFU2? z6F=RgkL=g>{>E3CMHOQD_K5krkjeF^#&hAzb`U65qghkAnUFH{{}$n2oZee1EQjF~ z-@=^+`I6x{9nY=|8IM(qoIy5w-EL`gwemk3}I5i`mq5m8Vi1|P~v7PXmUse$g|<=6lc=c>$%fw5v<)H8~EQU6(iG95m& zLJ?&0(3y%Ige~%j18Ncx!^b0sq&g`oDl^oCdts0y-!?KO()F}S`v--ASy#+C>rWh9!P?;+pKmZFbhD20>-HgTX#N>VFH?%yCX zb4McuXKrol?bt3}nrhq)-=cSAo0I`jtw;8yn$`*d%p-E4_S?)oNa5>p8D$Ii` z)~s2&dw06KJ=MH7)zX59F#H#FlJzi$%*;5RNCF;iUKLJ_dEYam1&d`2o{KLd`^qDS z=v?t+s27)xS~NEWHEZr&mCxQ>DiRoRzZ49dJhBWy4D!2kb!y8J(w~ZBpg3V3MQjE) zBoA-R3>YR8K%2nb6mfG&9?~cjg+Lp$51ytUUn!!1#r;%sBSJ`oBzU{A7U9J*GFklT zNYqGb^d|fMH<5V;l>h($ literal 0 HcmV?d00001 diff --git a/MyDict.c b/MyDict.c index e93494c..a1ba8f7 100644 --- a/MyDict.c +++ b/MyDict.c @@ -329,34 +329,40 @@ main(int argc, char *argv[]) { } while (1); system("cd && clear"); #else - TernNode *dict; - - struct timeval start, end; - gettimeofday(&start, NULL); - dict = createDict(); - gettimeofday(&end, NULL); - - printf("******createDict successfully*****\n"); - printf("*****建立词典耗时 %.4f s.*****\n", 1.0 * (1000000 * (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec)) / 1000000); - printf("*****Input --quit for quiting.*****\n"); - do { - printf(">>>>>>>"); - scanf("%s", query); - if (TestAndTolower(query) == 0) { - printf("Invalid input"); - continue; - } - if(!strcmp("--quit", query)) - break; - TernNode *node = queryDict(dict, query); - if(node && node->data){ - printf("%s\n", node->data); - } else { - printf("\n"); - } - } while (1); - system("cd && clear"); + TernNode *dict; + + struct timeval start, end; + gettimeofday(&start, NULL); + dict = createDict(); + gettimeofday(&end, NULL); + + printf("\033[1;34;40m"); + + printf("******createDict successfully*****\n"); + printf("*****建立词典耗时 %.4f s.*****\n", 1.0 * (1000000 * (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec)) / 1000000); + printf("*****Input --quit for quiting.*****\n"); + do { + printf("\033[1;34;40m"); + printf(">>>>>>>"); + printf("\033[1;32;40m"); + scanf("%s", query); + if (TestAndTolower(query) == 0) { + printf("Invalid input"); + continue; + } + if(!strcmp("--quit", query)) + break; + TernNode *node = queryDict(dict, query); + printf("\033[1;31;40m"); + if(node && node->data){ + printf("%s\n", node->data); + } else { + printf("\n"); + } + } while (1); + printf("\033[0m"); + system("cd && clear"); #endif - return 0; + return 0; } From eb50e0963d772a0845a78d70901cdc47024a8cbf Mon Sep 17 00:00:00 2001 From: bg2bkk Date: Tue, 29 Mar 2016 08:01:48 +0800 Subject: [PATCH 3/4] =?UTF-8?q?add=20=E4=B8=89=E5=90=91=E5=8D=95=E8=AF=8D?= =?UTF-8?q?=E6=9F=A5=E6=89=BE=E6=A0=91=20Readme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .MyDict.c.swp | Bin 20480 -> 0 bytes README.md | 45 +++++++++++++++++++-------------------------- 2 files changed, 19 insertions(+), 26 deletions(-) delete mode 100644 .MyDict.c.swp diff --git a/.MyDict.c.swp b/.MyDict.c.swp deleted file mode 100644 index b95326e3074e0653aae6209b115acd147693edd0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20480 zcmeI23vg7`8OLwM*IKPsZMB_FJp>`UAy2?c4Mb7EGL4`Z95pb^vU@jK+3d!B00EIG zuY^}ni3rF;Ab>&)7LkwyvA)`|9otT)+G^`#+`AiE#ZhM*#hJGM?>z3kyPE`1I-Pdz z&ir=w-h0lu-}%mW&iT$c->lL*Cf&lWEgq%ebBU(C==$=G8kWA$F2UUEyr_@FN?e~NtrWOW`n8gMl5O*D{* z2Z}~tG;QRFGTudp2AR3c-nQYuYE^8L$W}0GEQ}7a+a?M8O1b!lh{+f)wZk`@lA^ z4zz$JU_7`A{PlcI`yF@Sg4e-|UT8`ue+0Bb-MxE1^y3;|bw6R?xdK?}GIaJ!Lq>tY2Z8opSn z>CxWCrM)L9+4)pz@ls>i!H;{Iky3M&6lktsIL;7@;;+=hY%Gh#qai)) zrW>Ae@h}+n>+{OB+Y)+|2dcQMqCtIf#ILi0Xe1IZWWHK&lojwSY=Jf>67*}N0%(Aq z8%>*y-76t!YTM!76%V5Rcr_(Xty*rZeaK!-p^0M3DO4tIkVYV>wor3L^>`v0=H)`V z`Y41Cxbq6=Qx%DOLoA%Ao25rtB*5zwW5F03nkeQ8rib$i`Q6Dp4OK7T3q@kOJ5Wy- zDBd+G9Mhw53deDax{;uT46o6nl*UEZBLDE=bX`)Sgcurg4UL)au;<~qtEFZ|QLKoS z&49Y|rb6^ldXlrI=y?!i<)2p_12ZUd2e zSaP1qfX2^@@|0%(Y<|S(X*AZZN_IA)Yos?lZ5(M$cCLi%_WCM0PcEnw(HGUdah)3} zLk%L)VzKpsxzvEwBTGwXNSo#V@o?g#$_cE1t8((R%F1%>CSEVzIY@9N z3M)|E-B`Bt*y26C`xmFz?MZg7IJS8^RB3f7WBCy@J_&^mo@-b3TELD{kE3bU^d4Pj zY}#U04drFjP7VuXL4*sP%vayQ`)*N%552)(&4;fUMHSN8MgJ>DXOUU#rssm?h%u#a z&T3&7H%DsXKE+^T$1>6>Z-19-fOO1FZDblk<{&bhS%yK$xKW&$ zYMo+zPFj)EF=y98riWrWbzoHqG2X)%al`gd&IA1ZX|y>?5oVGx-r(jLYxUD=_A}k? zGy)D_N_La$J1HA%OC$A7DQSe@?!j1A=2=iuB8LBCtw)WnT}I1enA^)z>z~6+o^C#z zdU}Pib4hybGMWmwhl2a?M##BQGlQ{qZ}M<+vTGS1phS~c!$co-C}hKjlMwb;?MM65}<<24VhW#uU&xh+s zPG3FL<&6g;VY-a4c&*OJ`=A@~sIF59d=6q&dNfQjP{2hlFDZ$6=jdEk z(oRlQG#WB_X{IC3-&Q?!2J-4&UoD&~xc9Z*7@G?39f!qmx{R#Kv_$4|I%+Rd7}n##&Xj8;V_Fey-3yc4jP9;bqa1jD{i z0%DGd2kZ3W+6vnp%-*5ktn7Q3*=w@z`GXOtSt{z%yVc zxEI_4t^${WB-ZWkfWLva!CT-Ba0qM%t>AvJ5LAQF;78yqtmSFFUjs_O5a0sGv2K3_ zw1IVCC8z;+f$P99a6UK-`~z$Gzk^?cHqZ=eK@GSWOavE$`>@6z4@QG40j>An0EfT| zFbhlqW5DI$pIFb+`hFK!4DJNuK|c5)I2U{fJ-rTo1zrUQ0oR?>oAcvnz|nxC0Y?Ll z2L3}DSjZhM_*!yL5c?V05IyV<2GoOCLoBY>x$}H}+Vk>-bT8lkK^FR`eY@$UBS%)3 z*3r%ozt7l0Cil^3KT6)CAM=gso>>Infl_xVrCO`n?$KYBL2eY1msM0jUSj5pf&n*o zz#z3P2|O+9qe|k^mz9q!D<73n2TE!Am%OyG>|S#Vmweblah>^U$~O!9JhL-u4uJ;4 zXY|$8yYq^Q?o9;a*iGZ5c|00rk)Iq5`6A(XFr1KHu}X$tCI6)G9B&A|axh$t^-yw84BV1;O@MkF;lXP8mRXveU0ctUn)(7_}HXT zPgL$vS-JfYOBj`w?>SGRYB8ZQY)D3-GZ1MIsuE*+{ z4J{spWq_Hnkd+meVjs<2R?0v07uH`KpEEP2`>?+pi+iJSdFf%p&8KjtGal2m$HPjP zEXY&t(n$OGWQbHCwl!({kHr!`pB{?^5}{CottJ}JW#!9Jmr8*BX>!^l0l&AwJq%(O zitRA7$;dvjRJLB}wiK0_nO8GAyIkMUJ78Wy6@?7%!5ATBv^od9s~kfIww~0(fk%$M z#s}+kq|dIE*`-dUJDFW6rwgf8>96aYa<>_v%cytcc7^{pQ&&x%X;uB3q^>;T)re9!>Kf-&H7K=A@^VxQ&>@LTXI zcp1C|o&v2P0qzF3gTdej;6h*^Cg5H0JU9gQfP27Q;C3(-TnkFUZx9pk5=ekJ2!fx1 zG2naPL&N~Q3l4$(U?Yfv5SR|G1m}Ts!P#IC_ycU~FxUyUfX#s08#%~ipfFjneNTM_ zhAXy+vC~HnbNa71lcPs8olr)0-+gQ_@4h&kliQK3!0N2lJxZ;H3!VXkg&HVwdXt(9 z_(cCTPT5IB`$0QhTeS&VC+@?9SR|0O(NC%ALwBu%_e@9MbX58~gC7zM&_F7-7)5zd z8}09^LwA1R$vl=<3F*};ozPas;k-&l`{~wyCbpxq59(C{+BByBXYRzFm#jqS>wJ|C zWX{yNJ-rN7pr1^ug3~4plRvYX**xE;Ec6-p$W0yEXP@#@PMTBXLBGum20Ze&oV&h~ z&GhTD5;a&ii1>h9W3jZFI`xnN5>=|GpW}B*!KZX?V7Vvo^gQPf-cq;SITrnwGFU2? z6F=RgkL=g>{>E3CMHOQD_K5krkjeF^#&hAzb`U65qghkAnUFH{{}$n2oZee1EQjF~ z-@=^+`I6x{9nY=|8IM(qoIy5w-EL`gwemk3}I5i`mq5m8Vi1|P~v7PXmUse$g|<=6lc=c>$%fw5v<)H8~EQU6(iG95m& zLJ?&0(3y%Ige~%j18Ncx!^b0sq&g`oDl^oCdts0y-!?KO()F}S`v--ASy#+C>rWh9!P?;+pKmZFbhD20>-HgTX#N>VFH?%yCX zb4McuXKrol?bt3}nrhq)-=cSAo0I`jtw;8yn$`*d%p-E4_S?)oNa5>p8D$Ii` z)~s2&dw06KJ=MH7)zX59F#H#FlJzi$%*;5RNCF;iUKLJ_dEYam1&d`2o{KLd`^qDS z=v?t+s27)xS~NEWHEZr&mCxQ>DiRoRzZ49dJhBWy4D!2kb!y8J(w~ZBpg3V3MQjE) zBoA-R3>YR8K%2nb6mfG&9?~cjg+Lp$51ytUUn!!1#r;%sBSJ`oBzU{A7U9J*GFklT zNYqGb^d|fMH<5V;l>h($ diff --git a/README.md b/README.md index b0f6f3d..adc20c0 100644 --- a/README.md +++ b/README.md @@ -1,34 +1,27 @@ MyDict --------------------------------------- -一个用C语言实现的英汉对照词典。 +* 一个用C语言实现的英汉对照词典。 -功能: -支持大小写混合输入。 -目前词库规模为45,093词。 -词库文件可以更换来生成新的词典。 -离线可用,简单实用。 -不用为了查个词再去动用google这种神器了,尤其看文档或者看源码时不知道某个词啥意思,只要之前用tmux给它分配一个很小的小窗口,随时可以切换过来查询。 +* 功能: + * 支持大小写混合输入。 + * 目前词库规模为45,093词。 + * 词库文件可以更换来生成新的词典。 + * 离线可用,简单实用。 + * 不用为了查个词再去动用google这种神器了,尤其看文档或者看源码时不知道某个词啥意思,只要之前用tmux给它分配一个很小的小窗口,随时可以切换过来查询。 -实现: -编码采用的是ASCII码集,数据结构采用Trie,内存占用73MB。以后想优化成三向单词查找树,以进一步减少内存使用。 -具体可以看看注释。 +* 实现: + * 编码采用的是ASCII码集 + * 数据结构采用Trie,内存占用73MB。 + * 我更新实现了三向单词查找树,以进一步减少内存使用,但是毫无疑问的,由于树的高度大大增加,恐怕在查询单词时会比较麻烦 -运行: -Linux环境下运行切换到源文件所在目录后运行make,然后执行./MyDict即可。 +* 运行: + * Linux环境下运行切换到源文件所在目录后运行make,然后执行./MyDict即可。 -各文件说明: -MyDict.c: 源码文件 -raw-dict: 用于生成词典的原始词库文件,编码格式为UTF8。 -makefile: makefile文件,切换到对应目录,直接运行make即可。 +* 各文件说明: + * MyDict.c: 源码文件 + * raw-dict: 用于生成词典的原始词库文件,编码格式为UTF8。 + * makefile: makefile文件,切换到对应目录,直接运行make即可。 -注意: -Windows下用于生成词典的原始词库文件需要是gbk编码,如果您是windows环境,要提前把raw-dict文件转码成为gbk格式,否则的话,虽然能成功建立词典,但是查询的时候看到的是乱码。 - -后续改进: -改进数据结构以进一步节约内存,同时尽量感觉不到延迟。 -输入提示,即当使用者不太记得单词如何拼时输入几个字母后按两下tab键弹出所有前缀相同的单词。 -错误提示,如果输入单词不存在,输出正确的最长前缀单词。 -加上音标。 -加上短语查询。 -最终目标是翻译! +* 注意: + * Windows下用于生成词典的原始词库文件需要是gbk编码,如果您是windows环境,要提前把raw-dict文件转码成为gbk格式,否则的话,虽然能成功建立词典,但是查询的时候看到的是乱码。 From 57d4aaea2e10d1927b0b7f3a01a704b510a29f23 Mon Sep 17 00:00:00 2001 From: bg2bkk Date: Tue, 29 Mar 2016 08:05:30 +0800 Subject: [PATCH 4/4] =?UTF-8?q?add=20=E4=B8=89=E5=90=91=E5=8D=95=E8=AF=8D?= =?UTF-8?q?=E6=9F=A5=E6=89=BE=E6=A0=91=20Readme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index adc20c0..8e52439 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ MyDict * 功能: * 支持大小写混合输入。 - * 目前词库规模为45,093词。 + * 目前词库规模为103,976词。 * 词库文件可以更换来生成新的词典。 * 离线可用,简单实用。 * 不用为了查个词再去动用google这种神器了,尤其看文档或者看源码时不知道某个词啥意思,只要之前用tmux给它分配一个很小的小窗口,随时可以切换过来查询。 @@ -23,5 +23,10 @@ MyDict * raw-dict: 用于生成词典的原始词库文件,编码格式为UTF8。 * makefile: makefile文件,切换到对应目录,直接运行make即可。 +* TODO LIST + * 根据词库raw-dict,可以将常用单词的全部释义、音标和固定搭配从 有道词典 韦氏词典 等网站上由爬虫爬下来,python非常适合做 + * 词库文件每次载入时都要读文件,可以考虑要么压缩文件节省空间,要么采用数据库的方式,但是会导致问题负杂 + * 注意: * Windows下用于生成词典的原始词库文件需要是gbk编码,如果您是windows环境,要提前把raw-dict文件转码成为gbk格式,否则的话,虽然能成功建立词典,但是查询的时候看到的是乱码。 +