-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathIndexingFile3.hs
More file actions
127 lines (102 loc) · 3.69 KB
/
Copy pathIndexingFile3.hs
File metadata and controls
127 lines (102 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
-- This is source code module for 12.31 -list comprehension.
module File3
(Doc, Line, Word1, numLines,numWords, whitespace, splitWords, dropSpace,split, getWord, dropWord, allNumWords,
sortLs, orderPair, makeLists, amalgamate, shorten, makeIndex)
where
type Doc = String
type Line = String
type Word1 = String
-- take input document and split it line wise by \n
--lines :: Doc -> [Line]
-- we are using default lines function in prelude.
-- This is original code.
-- it pairs each line with its line no.
numLines :: [Line] -> [(Int , Line)]
numLines l = zip [1 .. length l] l
-- This is original code.
numWords :: (Int , Line) -> [(Int , Word1)]
numWords (number , line) = [ (number , word) | word <- splitWords line ]
-- This is original code.
-- whitespace declairation.
whitespace :: String
whitespace = " \n\t;:.,\'\"!?()-"
-- following set of functions are use by numWords.
-- This is original code.
-- split the string
splitWords :: String -> [Word1]
splitWords st = split (dropSpace st)
split :: String -> [Word1]
split [] = []
split st = (getWord st) : split (dropSpace (dropWord st))
-- This is original code.
--drop the whitespace character from the string
dropSpace :: String -> String
dropSpace [] = []
dropSpace (x:xs)
| elem x whitespace = dropSpace xs
| otherwise = (x:xs)
-- This is original code.
-- get the front word of string
getWord :: String -> String
getWord [] = []
getWord (x:xs)
| elem x whitespace = []
| otherwise = x : getWord xs
-- This is original code.
-- drop the front words of string
dropWord :: String -> String
dropWord [] = []
dropWord (x:xs)
| elem x whitespace = (x:xs)
| otherwise = dropWord xs
-- end of functions use by numWords
-- This is original code.
--part b: do it for all lines.
allNumWords :: [( Int , Line)] -> [(Int , Word1)]
allNumWords = concat . map numWords
-- This is original function.
-- sort list
sortLs :: [(Int , Word1)] -> [(Int , Word1)]
sortLs [] = []
sortLs (p:ps) = sortLs smaller ++ [p] ++ sortLs larger
where
smaller = [ q | q<-ps , orderPair q p ]
larger = [ q | q<-ps , orderPair p q ]
orderPair :: (Int , Word1) -> (Int , Word1) -> Bool
orderPair ( n1 , w1 ) ( n2 , w2 ) = w1 < w2 || ( w1 == w2 && n1 < n2 )
-- This is original function
-- make list
makeLists :: [ (Int,Word1) ] -> [ ([Int],Word1) ]
makeLists = map mklis
where
mklis ( n , st ) = ( [n] , st )
-- This is the original function.
-- combine the list of int of same words into one list.
amalgamate :: [ ([Int],Word1) ] -> [ ([Int],Word1) ]
amalgamate [] = []
amalgamate [p] = [p]
amalgamate ((l1,w1):(l2,w2):rest)
| w1 /= w2 = (l1,w1) : amalgamate ((l2,w2):rest)
| otherwise = amalgamate ((l1++l2,w1):rest)
--This function is modified according to 12.31- list comprehension.
--remove all short < 3 words.
shorten :: [([Int],Word1)] -> [([Int],Word1)]
shorten [] = []
shorten (([no],wo):xs) = [sizer ([n],w) | ([n],w) <- [([no],wo)], (length w) > 3 ] ++ shorten xs
where
sizer ([n],w) = ([n],w)
-- This is original function.
-- call makeindex function.
makeIndex :: Doc -> [ ([Int],Word1) ]
makeIndex
= lines >.> -- Doc -> [Line]
numLines >.> -- [Line] -> [(Int,Line)]
allNumWords >.> -- [(Int,Line)] -> [(Int,Word)]
sortLs >.> -- [(Int,Word)] -> [(Int,Word)]
makeLists >.> -- [(Int,Word)] -> [([Int],Word)]
amalgamate >.> -- [([Int],Word)] -> [([Int],Word)]
shorten -- [([Int],Word)] -> [([Int],Word)]
-- This is for >.> mention in makeIndex.
infixl 9 >.>
(>.>) :: (a -> b) -> (b -> c) -> (a -> c)
g >.> f = f . g