diff --git a/imap/xapian_wrap.cpp b/imap/xapian_wrap.cpp index 9d28ee8ba0..c1c1fef03e 100644 --- a/imap/xapian_wrap.cpp +++ b/imap/xapian_wrap.cpp @@ -57,7 +57,7 @@ static Xapian::Stopper *get_stopper() struct buf buf = BUF_INITIALIZER; buf_setcstr(&buf, swpath); // XXX doesn't play nice with WIN32 paths - buf_appendcstr(&buf, "/english.list"); + buf_appendcstr(&buf, "/english.txt"); // Open the stopword file errno = 0; diff --git a/languages/stopwords/arabic.list b/languages/stopwords/arabic.txt similarity index 79% rename from languages/stopwords/arabic.list rename to languages/stopwords/arabic.txt index d3dc8df4e2..c3bd1dd9b2 100644 --- a/languages/stopwords/arabic.list +++ b/languages/stopwords/arabic.txt @@ -1,3 +1,9 @@ + + | An Arabic stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + | (This is not an official Snowball stop word list, basically inspired from + | Arabic Stop Words Project) + إذ إذا إذما @@ -25,11 +31,12 @@ إليكن أم أما +أما إما أن إن -أنا إنا +أنا أنت أنتم أنتما @@ -37,6 +44,7 @@ إنما إنه أنى +أنى آه آها أو @@ -45,17 +53,19 @@ أوه آي أي +أيها إي أين +أين أينما إيه -أيها بخ بس بعد بعض بك بكم +بكم بكما بكن بل @@ -70,8 +80,8 @@ بهما بهن بي -بيد بين +بيد | though تلك تلكم تلكما @@ -108,7 +118,7 @@ ذينك ريث سوف -سوى +سوى | except شتان عدا عسى @@ -119,7 +129,7 @@ عما عن عند -غير +غير | except فإذا فإن فلا @@ -144,13 +154,14 @@ كليكما كليهما كم +كم كما كي كيت كيف كيفما لا -لاسيما +لاسيما | especially لدى لست لستم @@ -188,15 +199,15 @@ ليسوا ما ماذا -متى +متى | when مذ مع مما ممن من -منذ منه منها +منذ مه مهما نحن @@ -229,10 +240,10 @@ هيا هيت هيهات -وإذ -وإذا والذي والذين +وإذ +وإذا وإن ولا ولكن diff --git a/languages/stopwords/danish.list b/languages/stopwords/danish.list deleted file mode 100644 index 9015c303b0..0000000000 --- a/languages/stopwords/danish.list +++ /dev/null @@ -1,94 +0,0 @@ -ad -af -alle -alt -anden -at -blev -blive -bliver -da -de -dem -den -denne -der -deres -det -dette -dig -din -disse -dog -du -efter -eller -en -end -er -et -for -fra -ham -han -hans -har -havde -have -hende -hendes -her -hos -hun -hvad -hvis -hvor -i -ikke -ind -jeg -jer -jo -kunne -man -mange -med -meget -men -mig -min -mine -mit -mod -når -ned -noget -nogle -nu -og -også -om -op -os -over -på -sådan -selv -sig -sin -sine -sit -skal -skulle -som -thi -til -ud -under -være -været -var -vi -vil -ville -vor diff --git a/languages/stopwords/danish.txt b/languages/stopwords/danish.txt new file mode 100644 index 0000000000..3705204264 --- /dev/null +++ b/languages/stopwords/danish.txt @@ -0,0 +1,102 @@ + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +på | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/yourself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +når | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +også | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sådan | such, like this/like that diff --git a/languages/stopwords/dutch.list b/languages/stopwords/dutch.list deleted file mode 100644 index c316223589..0000000000 --- a/languages/stopwords/dutch.list +++ /dev/null @@ -1,101 +0,0 @@ -aan -al -alles -als -altijd -andere -ben -bij -daar -dan -dat -de -der -deze -die -dit -doch -doen -door -dus -een -eens -en -er -ge -geen -geweest -haar -had -heb -hebben -heeft -hem -het -hier -hij -hoe -hun -iemand -iets -ik -in -is -ja -je -kan -kon -kunnen -maar -me -meer -men -met -mij -mijn -moet -na -naar -niet -niets -nog -nu -of -om -omdat -onder -ons -ook -op -over -reeds -te -tegen -toch -toen -tot -u -uit -uw -van -veel -voor -want -waren -was -wat -werd -wezen -wie -wil -worden -wordt -zal -ze -zelf -zich -zij -zijn -zo -zonder -zou diff --git a/languages/stopwords/dutch.txt b/languages/stopwords/dutch.txt new file mode 100644 index 0000000000..d9f38a8e7e --- /dev/null +++ b/languages/stopwords/dutch.txt @@ -0,0 +1,113 @@ + + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other + diff --git a/languages/stopwords/english.list b/languages/stopwords/english.list deleted file mode 100644 index 66424f80d7..0000000000 --- a/languages/stopwords/english.list +++ /dev/null @@ -1,174 +0,0 @@ -a -about -above -after -again -against -all -am -an -and -any -are -aren't -as -at -be -because -been -before -being -below -between -both -but -by -cannot -can't -could -couldn't -did -didn't -do -does -doesn't -doing -don't -down -during -each -few -for -from -further -had -hadn't -has -hasn't -have -haven't -having -he -he'd -he'll -her -here -here's -hers -herself -he's -him -himself -his -how -how's -i -i'd -if -i'll -i'm -in -into -is -isn't -it -its -it's -itself -i've -let's -me -more -most -mustn't -my -myself -no -nor -not -of -off -on -once -only -or -other -ought -our -ours -ourselves -out -over -own -same -shan't -she -she'd -she'll -she's -should -shouldn't -so -some -such -than -that -that's -the -their -theirs -them -themselves -then -there -there's -these -they -they'd -they'll -they're -they've -this -those -through -to -too -under -until -up -very -was -wasn't -we -we'd -we'll -were -we're -weren't -we've -what -what's -when -when's -where -where's -which -while -who -whom -who's -why -why's -with -won't -would -wouldn't -you -you'd -you'll -your -you're -yours -yourself -yourselves -you've diff --git a/languages/stopwords/english.txt b/languages/stopwords/english.txt new file mode 100644 index 0000000000..aee35c52df --- /dev/null +++ b/languages/stopwords/english.txt @@ -0,0 +1,312 @@ + + | An English stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | Many of the forms below are quite rare (e.g. "yourselves") but included for + | completeness. + + | PRONOUNS FORMS + | 1st person sing + +i | subject, always in upper case of course + +me | object +my | possessive adjective + | the possessive pronoun `mine' is best suppressed, because of the + | sense of coal-mine etc. +myself | reflexive + | 1st person plural +we | subject + +| us | object + | care is required here because US = United States. It is usually + | safe to remove it if it is in lower case. +our | possessive adjective +ours | possessive pronoun +ourselves | reflexive + | second person (archaic `thou' forms not included) +you | subject and object +your | possessive adjective +yours | possessive pronoun +yourself | reflexive (singular) +yourselves | reflexive (plural) + | third person singular +he | subject +him | object +his | possessive adjective and pronoun +himself | reflexive + +she | subject +her | object and possessive adjective +hers | possessive pronoun +herself | reflexive + +it | subject and object +its | possessive adjective +itself | reflexive + | third person plural +they | subject +them | object +their | possessive adjective +theirs | possessive pronoun +themselves | reflexive + | other forms (demonstratives, interrogatives) +what +which +who +whom +this +that +these +those + + | VERB FORMS (using F.R. Palmer's nomenclature) + | BE +am | 1st person, present +is | -s form (3rd person, present) +are | present +was | 1st person, past +were | past +be | infinitive +been | past participle +being | -ing form + | HAVE +have | simple +has | -s form +had | past +having | -ing form + | DO +do | simple +does | -s form +did | past +doing | -ing form + + | The forms below are, I believe, best omitted, because of the significant + | homonym forms: + + | He made a WILL + | old tin CAN + | merry month of MAY + | a smell of MUST + | fight the good fight with all thy MIGHT + + | would, could, should, ought might however be included + + | | AUXILIARIES + | | WILL + |will + +would + + | | SHALL + |shall + +should + + | | CAN + |can + +could + + | | MAY + |may + |might + | | MUST + |must + | | OUGHT + +ought + + | COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing + | pronoun + verb + +i'm +you're +he's +she's +it's +we're +they're +i've +you've +we've +they've +i'd +you'd +he'd +she'd +we'd +they'd +i'll +you'll +he'll +she'll +we'll +they'll + + | verb + negation + +isn't +aren't +wasn't +weren't +hasn't +haven't +hadn't +doesn't +don't +didn't + + | auxiliary + negation + +won't +wouldn't +shan't +shouldn't +can't +cannot +couldn't +mustn't + + | miscellaneous forms + +let's +that's +who's +what's +here's +there's +when's +where's +why's +how's + + | rarer forms + + | daren't needn't + + | doubtful forms + + | oughtn't mightn't + + | ARTICLES +a +an +the + + | THE REST (Overlap among prepositions, conjunctions, adverbs etc is so + | high, that classification is pointless.) +and +but +if +or +because +as +until +while + +of +at +by +for +with +about +against +between +into +through +during +before +after +above +below +to +from +up +down +in +out +on +off +over +under + +again +further +then +once + +here +there +when +where +why +how + +all +any +both +each +few +more +most +other +some +such + +no +nor +not +only +own +same +so +than +too +very + + | Just for the record, the following words are among the commonest in English + + | one + | every + | least + | less + | many + | now + | ever + | never + | say + | says + | said + | also + | get + | go + | goes + | just + | made + | make + | put + | see + | seen + | whether + | like + | well + | back + | even + | still + | way + | take + | since + | another + | however + | two + | three + | four + | five + | first + | second + | new + | old + | high + | long + diff --git a/languages/stopwords/finnish.list b/languages/stopwords/finnish.list deleted file mode 100644 index 7909e8d5c3..0000000000 --- a/languages/stopwords/finnish.list +++ /dev/null @@ -1,67 +0,0 @@ -ei -eivät -emme -en -et -että -ette -hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle -he heidän heidät heitä heissä heistä heihin heillä heiltä heille -itse -ja -joka jonka jota jossa josta johon jolla jolta jolle jona joksi -jos -jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi -kanssa -ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi -koska -kuin -kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi -kun -me meidän meidät meitä meissä meistä meihin meillä meiltä meille -mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi -minä minun minut minua minussa minusta minuun minulla minulta minulle -mitkä -mukaan -mutta -nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi -ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi -niin -noin -nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi -nyt -ole -olemme -olen -olet -olette -oli -olimme -olin -olisi -olisimme -olisin -olisit -olisitte -olisivat -olit -olitte -olivat -olla -olleet -ollut -on -ovat -poikki -sekä -se sen sitä siinä siitä siihen sillä siltä sille sinä siksi -sillä -sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle -tai -tämä tämän tätä tässä tästä tähän tällä tältä tälle tänä täksi -te teidän teidät teitä teissä teistä teihin teillä teiltä teille -tuo tuon tuota tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi -vaan -vai -vaikka -yli diff --git a/languages/stopwords/finnish.txt b/languages/stopwords/finnish.txt new file mode 100644 index 0000000000..2be66c00eb --- /dev/null +++ b/languages/stopwords/finnish.txt @@ -0,0 +1,88 @@ + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tällä tältä tälle tänä täksi | this +tuo tuon tuota tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +nyt | now +itse | self + diff --git a/languages/stopwords/french.list b/languages/stopwords/french.list deleted file mode 100644 index 454a07d5de..0000000000 --- a/languages/stopwords/french.list +++ /dev/null @@ -1,164 +0,0 @@ -à -ai -aie -aient -aies -ait -as -au -aura -aurai -auraient -aurais -aurait -auras -aurez -auriez -aurions -aurons -auront -aux -avaient -avais -avait -avec -avez -aviez -avions -avons -ayant -ayez -ayons -c -ce -ceci -cela -celà -ces -cet -cette -d -dans -de -des -du -elle -en -es -est -et -étaient -étais -était -étant -été -étée -étées -êtes -étés -étiez -étions -eu -eue -eues -eûmes -eurent -eus -eusse -eussent -eusses -eussiez -eussions -eut -eût -eûtes -eux -fûmes -furent -fus -fusse -fussent -fusses -fussiez -fussions -fut -fût -fûtes -ici -il -ils -j -je -l -la -le -les -leur -leurs -lui -m -ma -mais -me -même -mes -moi -mon -n -ne -nos -notre -nous -on -ont -ou -par -pas -pour -qu -que -quel -quelle -quelles -quels -qui -s -sa -sans -se -sera -serai -seraient -serais -serait -seras -serez -seriez -serions -serons -seront -ses -soi -soient -sois -soit -sommes -son -sont -soyez -soyons -suis -sur -t -ta -te -tes -toi -ton -tu -un -une -vos -votre -vous -y diff --git a/languages/stopwords/french.txt b/languages/stopwords/french.txt new file mode 100644 index 0000000000..5c6b4f8f4a --- /dev/null +++ b/languages/stopwords/french.txt @@ -0,0 +1,178 @@ + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +cela | that (added 11 Apr 2012. Omission reported by Adrien Grand) +celà | that (incorrect, though common) +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/languages/stopwords/german.list b/languages/stopwords/german.list deleted file mode 100644 index 25fa7c2a58..0000000000 --- a/languages/stopwords/german.list +++ /dev/null @@ -1,231 +0,0 @@ -aber -alle -allem -allen -aller -alles -als -also -am -an -ander -andere -anderem -anderen -anderer -anderes -anderm -andern -anderr -anders -auch -auf -aus -bei -bin -bis -bist -da -damit -dann -das -daß -dasselbe -dazu -dein -deine -deinem -deinen -deiner -deines -dem -demselben -den -denn -denselben -der -derer -derselbe -derselben -des -desselben -dessen -dich -die -dies -diese -dieselbe -dieselben -diesem -diesen -dieser -dieses -dir -doch -dort -du -durch -ein -eine -einem -einen -einer -eines -einig -einige -einigem -einigen -einiger -einiges -einmal -er -es -etwas -euch -euer -eure -eurem -euren -eurer -eures -für -gegen -gewesen -hab -habe -haben -hat -hatte -hatten -hier -hin -hinter -ich -ihm -ihn -ihnen -ihr -ihre -ihrem -ihren -ihrer -ihres -im -in -indem -ins -ist -jede -jedem -jeden -jeder -jedes -jene -jenem -jenen -jener -jenes -jetzt -kann -kein -keine -keinem -keinen -keiner -keines -können -könnte -machen -man -manche -manchem -manchen -mancher -manches -mein -meine -meinem -meinen -meiner -meines -mich -mir -mit -muss -musste -nach -nicht -nichts -noch -nun -nur -ob -oder -ohne -sehr -sein -seine -seinem -seinen -seiner -seines -selbst -sich -sie -sind -so -solche -solchem -solchen -solcher -solches -soll -sollte -sondern -sonst -über -um -und -uns -unse -unsem -unsen -unser -unses -unter -viel -vom -von -vor -während -war -waren -warst -was -weg -weil -weiter -welche -welchem -welchen -welcher -welches -wenn -werde -werden -wie -wieder -will -wir -wird -wirst -wo -wollen -wollte -würde -würden -zu -zum -zur -zwar -zwischen diff --git a/languages/stopwords/german.txt b/languages/stopwords/german.txt new file mode 100644 index 0000000000..5c45a517c4 --- /dev/null +++ b/languages/stopwords/german.txt @@ -0,0 +1,286 @@ + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/languages/stopwords/hungarian.list b/languages/stopwords/hungarian.txt similarity index 92% rename from languages/stopwords/hungarian.list rename to languages/stopwords/hungarian.txt index 95fa3fde1b..2599a8d1b9 100644 --- a/languages/stopwords/hungarian.list +++ b/languages/stopwords/hungarian.txt @@ -1,6 +1,8 @@ + +| Hungarian stop word list +| prepared by Anna Tordai + a -abban -ahhoz ahogy ahol aki @@ -16,23 +18,25 @@ amelyeket amelyet amelynek ami -amíg -amikor amit amolyan +amíg +amikor +át +abban +ahhoz annak arra arról -át az -azért azok azon -azonban azt +azzal +azért aztán azután -azzal +azonban bár be belül @@ -43,36 +47,36 @@ cikkeket csak de e -ebben eddig egész egy -egyéb egyes egyetlen +egyéb egyik egyre -ehhez ekkor el elég ellen -elõ -elõször -elõtt -elsõ -emilyen +elő +először +előtt +első én -ennek éppen +ebben +ehhez +emilyen +ennek erre -és ez +ezt ezek ezen -ezért -ezt ezzel +ezért +és fel felé hanem @@ -81,29 +85,29 @@ hogy hogyan igen így -ill -ill. illetve +ill. +ill ilyen ilyenkor -ismét ison +ismét itt jó -jobban jól +jobban kell kellett -keressünk keresztül +keressünk ki kívül között közül legalább -legyen lehet lehetett +legyen lenne lenni lesz @@ -111,27 +115,28 @@ lett maga magát majd +majd már más másik meg még mellett +mert mely melyek -mert mi -miért +mit míg -mikor +miért milyen +mikor minden -mindenki mindent +mindenki mindig mint mintha -mit mivel most nagy @@ -139,18 +144,18 @@ nagyobb nagyon ne néha -néhány nekem neki -nélkül nem +néhány +nélkül nincs -õ -õk -õket olyan -össze ott +össze +ő +ők +őket pedig persze rá @@ -168,9 +173,9 @@ szinte talán tehát teljes -több tovább továbbá +több úgy ugyanis új @@ -181,18 +186,18 @@ utána utolsó vagy vagyis -vagyok valaki valami valamint való +vagyok van vannak -vele -vissza -viszont -volna volt -voltak voltam +voltak voltunk +vissza +vele +viszont +volna diff --git a/languages/stopwords/indonesian.txt b/languages/stopwords/indonesian.txt new file mode 100644 index 0000000000..c433b01b2a --- /dev/null +++ b/languages/stopwords/indonesian.txt @@ -0,0 +1,91 @@ +yang | that +dan | and +di | in +dari | from +ini | this +pada kepada | at, to [person] +ada adalah | there is, is +dengan | with +untuk | for +dalam | in the +oleh | by +sebagai | as +juga | also, too +ke | to +atau | or +tidak | not +itu | that +sebuah | a +tersebut | the +dapat | can, may +ia | he/she, yes +telah | already +satu | one +memiliki | have +mereka | they +bahwa | that +lebih | more, more than +karena | because, since +seorang | one person, same +akan | will, about to +seperti | as, like +secara | on +kemudian | later, then +beberapa | some +banyak | many +antara | between +setelah | after +yaitu | that is +hanya | only +hingga | to +serta | along with +sama | same, and +dia | he/she/it (informal) +tetapi | but +namun | however +melalui | through +bisa | can +sehingga | so +ketika | when +suatu | a +sendiri | own (adverb) +bagi | for +semua | all +harus | must +setiap | each, every +maka | then +maupun | as well +tanpa | without +saja | only +jika | if +bukan | not +belum | not yet +sedangkan | while +yakni | i.e. +meskipun | although +hampir | almost +kita | we/us (inclusive) +demikian | thereby +daripada | from/than/instead of +apa | what/which/or/eh +ialah | is +sana | there +begitu | so +seseorang | someone +selain | besides +terlalu | too +ataupun | or +saya | me/I (formal) +bila | if/when +bagaimana | how +tapi | but +apabila | when/if +kalau | if +kami | we/us (exclusive) +melainkan | but (rather) +boleh | may,can +aku | I/me (informal) +anda | you (formal) +kamu | you (informal) +beliau | he/she/it (formal) +kalian | you (plural) diff --git a/languages/stopwords/italian.list b/languages/stopwords/italian.list deleted file mode 100644 index fac1d8b18a..0000000000 --- a/languages/stopwords/italian.list +++ /dev/null @@ -1,279 +0,0 @@ -a -abbia -abbiamo -abbiano -abbiate -ad -agl -agli -ai -al -all -alla -alle -allo -anche -avemmo -avendo -avesse -avessero -avessi -avessimo -aveste -avesti -avete -aveva -avevamo -avevano -avevate -avevi -avevo -avrà -avrai -avranno -avrebbe -avrebbero -avrei -avremmo -avremo -avreste -avresti -avrete -avrò -avuta -avute -avuti -avuto -c -che -chi -ci -coi -col -come -con -contro -cui -da -dagl -dagli -dai -dal -dall -dalla -dalle -dallo -degl -degli -dei -del -dell -della -delle -dello -di -dov -dove -e -è -ebbe -ebbero -ebbi -ed -era -erano -eravamo -eravate -eri -ero -essendo -faccia -facciamo -facciano -facciate -faccio -facemmo -facendo -facesse -facessero -facessi -facessimo -faceste -facesti -faceva -facevamo -facevano -facevate -facevi -facevo -fai -fanno -farà -farai -faranno -farebbe -farebbero -farei -faremmo -faremo -fareste -faresti -farete -farò -fece -fecero -feci -fosse -fossero -fossi -fossimo -foste -fosti -fu -fui -fummo -furono -gli -ha -hai -hanno -ho -i -il -in -io -l -la -le -lei -li -lo -loro -lui -ma -mi -mia -mie -miei -mio -ne -negl -negli -nei -nel -nell -nella -nelle -nello -noi -non -nostra -nostre -nostri -nostro -o -per -perché -più -quale -quanta -quante -quanti -quanto -quella -quelle -quelli -quello -questa -queste -questi -questo -sarà -sarai -saranno -sarebbe -sarebbero -sarei -saremmo -saremo -sareste -saresti -sarete -sarò -se -sei -si -sia -siamo -siano -siate -siete -sono -sta -stai -stando -stanno -starà -starai -staranno -starebbe -starebbero -starei -staremmo -staremo -stareste -staresti -starete -starò -stava -stavamo -stavano -stavate -stavi -stavo -stemmo -stesse -stessero -stessi -stessimo -steste -stesti -stette -stettero -stetti -stia -stiamo -stiano -stiate -sto -su -sua -sue -sugl -sugli -sui -sul -sull -sulla -sulle -sullo -suo -suoi -ti -tra -tu -tua -tue -tuo -tuoi -tutti -tutto -un -una -uno -vi -voi -vostra -vostre -vostri -vostro diff --git a/languages/stopwords/italian.txt b/languages/stopwords/italian.txt new file mode 100644 index 0000000000..a20bb9528a --- /dev/null +++ b/languages/stopwords/italian.txt @@ -0,0 +1,295 @@ + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/languages/stopwords/norwegian.list b/languages/stopwords/norwegian.list deleted file mode 100644 index b8c1735c39..0000000000 --- a/languages/stopwords/norwegian.list +++ /dev/null @@ -1,172 +0,0 @@ -å -alle -at -av -både -båe -bare -begge -ble -blei -bli -blir -blitt -da -då -de -deg -dei -deim -deira -deires -dem -den -denne -der -dere -deres -det -dette -di -din -disse -ditt -du -dykk -dykkar -eg -ein -eit -eitt -eller -elles -en -enn -er -et -ett -etter -for -før -fordi -fra -ha -hadde -han -hans -har -hennar -henne -hennes -her -hjå -ho -hoe -honom -hoss -hossen -hun -hva -hvem -hver -hvilke -hvilken -hvis -hvor -hvordan -hvorfor -i -ikke -ikkje -ingen -ingi -inkje -inn -inni -ja -jeg -kan -kom -korleis -korso -kun -kunne -kva -kvar -kvarhelst -kven -kvi -kvifor -man -mange -me -med -medan -meg -meget -mellom -men -mi -min -mine -mitt -mot -mykje -nå -når -ned -no -noe -noen -noka -noko -nokon -nokor -nokre -og -også -om -opp -oss -over -på -så -samme -sånn -seg -selv -si -sia -sidan -siden -sin -sine -sitt -sjøl -skal -skulle -slik -so -som -somme -somt -til -um -upp -ut -uten -være -vært -var -vår -vart -varte -ved -vere -verte -vi -vil -ville -vore -vors -vort diff --git a/languages/stopwords/norwegian.txt b/languages/stopwords/norwegian.txt new file mode 100644 index 0000000000..df1c509561 --- /dev/null +++ b/languages/stopwords/norwegian.txt @@ -0,0 +1,182 @@ + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmål dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/which/that +på | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +så | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nå | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +når | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +å | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sånn | such a +inni | inside/within +mellom | between +vår | our +hver | each +hvem | who +vors | us/ours +hvis | whose +både | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +også | also +slik | just +vært | been +båe | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +då | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjå | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/languages/stopwords/portuguese.list b/languages/stopwords/portuguese.list deleted file mode 100644 index c59439b198..0000000000 --- a/languages/stopwords/portuguese.list +++ /dev/null @@ -1,203 +0,0 @@ -a -à -ao -aos -aquela -aquelas -aquele -aqueles -aquilo -as -às -até -com -como -da -das -de -dela -delas -dele -deles -depois -do -dos -e -ela -elas -ele -eles -em -entre -era -eram -éramos -essa -essas -esse -esses -esta -está -estamos -estão -estas -estava -estavam -estávamos -este -esteja -estejam -estejamos -estes -esteve -estive -estivemos -estiver -estivera -estiveram -estivéramos -estiverem -estivermos -estivesse -estivessem -estivéssemos -estou -eu -foi -fomos -for -fora -foram -fôramos -forem -formos -fosse -fossem -fôssemos -fui -há -haja -hajam -hajamos -hão -havemos -hei -houve -houvemos -houver -houvera -houverá -houveram -houvéramos -houverão -houverei -houverem -houveremos -houveria -houveriam -houveríamos -houvermos -houvesse -houvessem -houvéssemos -isso -isto -já -lhe -lhes -mais -mas -me -mesmo -meu -meus -minha -minhas -muito -na -não -nas -nem -no -nos -nós -nossa -nossas -nosso -nossos -num -numa -o -os -ou -para -pela -pelas -pelo -pelos -por -qual -quando -que -quem -são -se -seja -sejam -sejamos -sem -será -serão -serei -seremos -seria -seriam -seríamos -seu -seus -só -somos -sou -sua -suas -também -te -tem -tém -temos -tenha -tenham -tenhamos -tenho -terá -terão -terei -teremos -teria -teriam -teríamos -teu -teus -teve -tinha -tinham -tínhamos -tive -tivemos -tiver -tivera -tiveram -tivéramos -tiverem -tivermos -tivesse -tivessem -tivéssemos -tu -tua -tuas -um -uma -você -vocês -vos diff --git a/languages/stopwords/portuguese.txt b/languages/stopwords/portuguese.txt new file mode 100644 index 0000000000..9c3c9ac76d --- /dev/null +++ b/languages/stopwords/portuguese.txt @@ -0,0 +1,245 @@ + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/languages/stopwords/russian.list b/languages/stopwords/russian.list deleted file mode 100644 index b08e0ee927..0000000000 --- a/languages/stopwords/russian.list +++ /dev/null @@ -1,159 +0,0 @@ -а -без -более -больше -будет -будто -бы -был -была -были -было -быть -в -вам -вас -вдруг -ведь -во -вот -впрочем -все -всегда -всего -всех -всю -вы -где -говорил -да -даже -два -для -до -другой -его -ее -ей -ему -если -есть -еще -ж -же -жизнь -за -зачем -здесь -и -из -или -им -иногда -их -к -кажется -как -какая -какой -когда -конечно -кто -куда -ли -лучше -между -меня -мне -много -может -можно -мой -моя -мы -на -над -надо -наконец -нас -не -него -нее -ней -нельзя -нет -ни -нибудь -никогда -ним -них -ничего -но -ну -о -об -один -он -она -они -опять -от -перед -по -под -после -потом -потому -почти -при -про -раз -разве -с -сам -свою -себе -себя -сегодня -сейчас -сказал -сказала -сказать -со -совсем -так -такой -там -тебя -тем -теперь -то -тогда -того -тоже -только -том -тот -три -тут -ты -у -уж -уже -хорошо -хоть -чего -человек -чем -через -что -чтоб -чтобы -чуть -эти -этого -этой -этом -этот -эту -я diff --git a/languages/stopwords/russian.txt b/languages/stopwords/russian.txt new file mode 100644 index 0000000000..54fcc3d6d3 --- /dev/null +++ b/languages/stopwords/russian.txt @@ -0,0 +1,236 @@ + + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/languages/stopwords/spanish.list b/languages/stopwords/spanish.list deleted file mode 100644 index f605a8b052..0000000000 --- a/languages/stopwords/spanish.list +++ /dev/null @@ -1,308 +0,0 @@ -a -al -algo -algunas -algunos -ante -antes -como -con -contra -cual -cuando -de -del -desde -donde -durante -e -el -él -ella -ellas -ellos -en -entre -era -erais -éramos -eran -eras -eres -es -esa -esas -ese -eso -esos -esta -está -estaba -estabais -estábamos -estaban -estabas -estad -estada -estadas -estado -estados -estáis -estamos -están -estando -estar -estará -estarán -estarás -estaré -estaréis -estaremos -estaría -estaríais -estaríamos -estarían -estarías -estas -estás -este -esté -estéis -estemos -estén -estés -esto -estos -estoy -estuve -estuviera -estuvierais -estuviéramos -estuvieran -estuvieras -estuvieron -estuviese -estuvieseis -estuviésemos -estuviesen -estuvieses -estuvimos -estuviste -estuvisteis -estuvo -fue -fuera -fuerais -fuéramos -fueran -fueras -fueron -fuese -fueseis -fuésemos -fuesen -fueses -fui -fuimos -fuiste -fuisteis -ha -habéis -había -habíais -habíamos -habían -habías -habida -habidas -habido -habidos -habiendo -habrá -habrán -habrás -habré -habréis -habremos -habría -habríais -habríamos -habrían -habrías -han -has -hasta -hay -haya -hayáis -hayamos -hayan -hayas -he -hemos -hube -hubiera -hubierais -hubiéramos -hubieran -hubieras -hubieron -hubiese -hubieseis -hubiésemos -hubiesen -hubieses -hubimos -hubiste -hubisteis -hubo -la -las -le -les -lo -los -más -me -mi -mí -mía -mías -mío -míos -mis -mucho -muchos -muy -nada -ni -no -nos -nosotras -nosotros -nuestra -nuestras -nuestro -nuestros -o -os -otra -otras -otro -otros -para -pero -poco -por -porque -que -qué -quien -quienes -se -sea -seáis -seamos -sean -seas -será -serán -serás -seré -seréis -seremos -sería -seríais -seríamos -serían -serías -sí -sido -siendo -sin -sobre -sois -somos -son -soy -su -sus -suya -suyas -suyo -suyos -también -tanto -te -tendrá -tendrán -tendrás -tendré -tendréis -tendremos -tendría -tendríais -tendríamos -tendrían -tendrías -tened -tenéis -tenemos -tenga -tengáis -tengamos -tengan -tengas -tengo -tenía -teníais -teníamos -tenían -tenías -tenida -tenidas -tenido -tenidos -teniendo -ti -tiene -tienen -tienes -todo -todos -tu -tú -tus -tuve -tuviera -tuvierais -tuviéramos -tuvieran -tuvieras -tuvieron -tuviese -tuvieseis -tuviésemos -tuviesen -tuvieses -tuvimos -tuviste -tuvisteis -tuvo -tuya -tuyas -tuyo -tuyos -un -una -uno -unos -vosotras -vosotros -vuestra -vuestras -vuestro -vuestros -y -ya -yo diff --git a/languages/stopwords/spanish.txt b/languages/stopwords/spanish.txt new file mode 100644 index 0000000000..fd323a458b --- /dev/null +++ b/languages/stopwords/spanish.txt @@ -0,0 +1,348 @@ + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/languages/stopwords/swedish.list b/languages/stopwords/swedish.list deleted file mode 100644 index 569049345b..0000000000 --- a/languages/stopwords/swedish.list +++ /dev/null @@ -1,114 +0,0 @@ -alla -allt -än -är -åt -att -av -blev -bli -blir -blivit -då -där -de -dem -den -denna -deras -dess -dessa -det -detta -dig -din -dina -ditt -du -efter -ej -eller -en -er -era -ert -ett -för -från -ha -hade -han -hans -har -här -henne -hennes -hon -honom -hur -i -icke -ingen -inom -inte -jag -ju -kan -kunde -man -med -mellan -men -mig -min -mina -mitt -mot -mycket -någon -något -några -när -ni -nu -och -om -oss -över -på -så -sådan -sådana -sådant -samma -sedan -sig -sin -sina -sitta -själv -skulle -som -till -under -upp -ut -utan -vad -var -vår -vara -våra -varför -varit -varje -vars -vart -vårt -vem -vi -vid -vilka -vilkas -vilken -vilket diff --git a/languages/stopwords/swedish.txt b/languages/stopwords/swedish.txt new file mode 100644 index 0000000000..cac0b0c02d --- /dev/null +++ b/languages/stopwords/swedish.txt @@ -0,0 +1,125 @@ + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | så = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +på | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +så | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +då | then, when +sin | his +nu | now +har | have +inte | inte någon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +något | some etc +från | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +någon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +åt | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +några | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sådan | such a +vår | our +blivit | from bli +dess | its +inom | within +mellan | between +sådant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sådana | such a +vart | each +dina | thy +vars | whose +vårt | our +våra | our +ert | your +era | your +vilkas | whose + diff --git a/lib/imapoptions b/lib/imapoptions index cc623ee0d8..1b0a3e5c62 100644 --- a/lib/imapoptions +++ b/lib/imapoptions @@ -2042,7 +2042,7 @@ If all partitions are over that limit, this feature is not used anymore. { "search_stopword_path", NULL, STRING } /* The absolute base path to the search stopword lists. If not specified, no stopwords will be taken into account during search indexing. Currently, - the only supported and default stop word file is english.list. */ + the only supported and default stop word file is english.txt. */ # Commented out - there's no such thing as "searchpartition-name", # but we need this for the man page