PR-HARIHARAN commited on
Commit
13b1df6
·
verified ·
1 Parent(s): 1918e3a

Upload 6 files

Browse files
MultiOutput_NB.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b500848f0069fdcdffff29884f97a0617b6d0b2cf7b41be0710a9353bb9078c9
3
+ size 4163656
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import re
4
+ from scipy.sparse import hstack
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ from sklearn.preprocessing import LabelEncoder
7
+ from nltk.tokenize import word_tokenize
8
+ from nltk.corpus import stopwords
9
+ from nltk.stem import PorterStemmer, WordNetLemmatizer
10
+ import nltk
11
+ import numpy as np
12
+
13
+ # Ensure required NLTK data is downloaded
14
+ nltk.download('punkt')
15
+ nltk.download('stopwords')
16
+ nltk.download('wordnet')
17
+
18
+ # Load pre-trained models and encoders
19
+ with open("MultiOutput_NB.pkl", "rb") as file:
20
+ nb_model = pickle.load(file)
21
+
22
+ with open("MultiOutput_RF.pkl", "rb") as file:
23
+ rf_model = pickle.load(file)
24
+
25
+ with open("vectorizer.pkl", "rb") as file:
26
+ vectorizer = pickle.load(file)
27
+
28
+ with open("category_encoder.pkl", "rb") as file:
29
+ label_encoder_category = pickle.load(file)
30
+
31
+ with open("subcategory_encoder.pkl", "rb") as file:
32
+ label_encoder_subcategory = pickle.load(file)
33
+
34
+ # Load custom stopwords
35
+ def load_custom_stopwords(file_path):
36
+ try:
37
+ with open(file_path, 'r', encoding='utf-8') as file:
38
+ words = [line.strip() for line in file if line.strip()]
39
+ return set(words)
40
+ except FileNotFoundError:
41
+ st.error(f"Stopwords file '{file_path}' not found.")
42
+ return set()
43
+
44
+ # Function to preprocess input text
45
+ def preprocess_text(text):
46
+ # Load stopwords
47
+ file_path = 'stop_hinglish.txt'
48
+ custom_stopwords = load_custom_stopwords(file_path)
49
+ eng_stopwords = set(stopwords.words('english'))
50
+ hin_stopwords = set(stopwords.words('hinglish'))
51
+ combined_stopwords = eng_stopwords.union(hin_stopwords).union(custom_stopwords)
52
+
53
+ # Preprocessing steps
54
+ text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters
55
+ text = text.lower() # Convert to lowercase
56
+ words = word_tokenize(text) # Tokenize text
57
+ words = [word for word in words if word not in combined_stopwords] # Remove stopwords
58
+
59
+ # Stemming and lemmatization
60
+ stemmer = PorterStemmer()
61
+ lemmatizer = WordNetLemmatizer()
62
+ words = [lemmatizer.lemmatize(stemmer.stem(word)) for word in words]
63
+
64
+ # Join the words back into a single string
65
+ processed_text = ' '.join(words)
66
+ return processed_text
67
+
68
+ # Streamlit app
69
+ st.title("Crime Category & Subcategory Classification")
70
+ st.write("Enter the crime description below to classify its category and sub-category.")
71
+
72
+ # Model selection dropdown
73
+ model_choice = st.selectbox(
74
+ "Choose the classification model:",
75
+ ["Naive Bayes", "Random Forest"]
76
+ )
77
+
78
+ # Input area
79
+ user_input = st.text_area("Crime Description:")
80
+
81
+ if st.button("Classify"):
82
+ if user_input.strip(): # Check if input is not empty
83
+ # Preprocess the input
84
+ cleaned_text = preprocess_text(user_input)
85
+ st.write("Cleaned text:", cleaned_text)
86
+
87
+ # Vectorize the input text
88
+ vectorized_text = vectorizer.transform([cleaned_text])
89
+
90
+ # Additional features placeholder (if needed in the sparse matrix)
91
+ additional_features = [[0, 0]] # Placeholder for any numeric features, replace as necessary
92
+
93
+ # Combine vectorized text with additional features
94
+ combined_features = hstack([vectorized_text, additional_features])
95
+
96
+ # Select the model based on user choice
97
+ if model_choice == "Naive Bayes":
98
+ model = nb_model
99
+ elif model_choice == "Random Forest":
100
+ model = rf_model
101
+ else:
102
+ st.error("Invalid model choice.")
103
+ st.stop()
104
+
105
+ # Predict using the selected model
106
+ predictions = model.predict(combined_features)
107
+
108
+ try:
109
+ # Extract indices if predictions are probabilities or multi-output
110
+ if predictions.ndim == 3: # Multi-output with separate arrays
111
+ category_index = np.argmax(predictions[0]) # First output
112
+ subcategory_index = np.argmax(predictions[1]) # Second output
113
+ elif predictions.ndim == 2: # Multi-output as a single array
114
+ category_index = int(predictions[0][0])
115
+ subcategory_index = int(predictions[0][1])
116
+ else:
117
+ st.error("Unexpected model output shape. Please check your model.")
118
+ st.stop()
119
+
120
+ # Decode predictions
121
+ predicted_category = label_encoder_category.inverse_transform([category_index])[0]
122
+ predicted_subcategory = label_encoder_subcategory.inverse_transform([subcategory_index])[0]
123
+
124
+ # Display results
125
+ st.success("Classification Results:")
126
+ st.write(f"**Category:** {predicted_category}")
127
+ st.write(f"**Sub-Category:** {predicted_subcategory}")
128
+
129
+ except Exception as e:
130
+ st.error(f"Error decoding predictions: {e}")
131
+ else:
132
+ st.error("Please enter a valid crime description.")
category_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6f1b04f9bbdb91519fa99444f601b530761b15b01d2f53e7727567973db9e4d
3
+ size 731
stop_hinglish.txt ADDED
@@ -0,0 +1,1036 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a
2
+ aadi
3
+ aaj
4
+ aap
5
+ aapne
6
+ aata
7
+ aati
8
+ aaya
9
+ aaye
10
+ ab
11
+ abbe
12
+ abbey
13
+ abe
14
+ abhi
15
+ able
16
+ about
17
+ above
18
+ accha
19
+ according
20
+ accordingly
21
+ acha
22
+ achcha
23
+ across
24
+ actually
25
+ after
26
+ afterwards
27
+ again
28
+ against
29
+ agar
30
+ ain
31
+ aint
32
+ ain't
33
+ aisa
34
+ aise
35
+ aisi
36
+ alag
37
+ all
38
+ allow
39
+ allows
40
+ almost
41
+ alone
42
+ along
43
+ already
44
+ also
45
+ although
46
+ always
47
+ am
48
+ among
49
+ amongst
50
+ an
51
+ and
52
+ andar
53
+ another
54
+ any
55
+ anybody
56
+ anyhow
57
+ anyone
58
+ anything
59
+ anyway
60
+ anyways
61
+ anywhere
62
+ ap
63
+ apan
64
+ apart
65
+ apna
66
+ apnaa
67
+ apne
68
+ apni
69
+ appear
70
+ are
71
+ aren
72
+ arent
73
+ aren't
74
+ around
75
+ arre
76
+ as
77
+ aside
78
+ ask
79
+ asking
80
+ at
81
+ aur
82
+ avum
83
+ aya
84
+ aye
85
+ baad
86
+ baar
87
+ bad
88
+ bahut
89
+ bana
90
+ banae
91
+ banai
92
+ banao
93
+ banaya
94
+ banaye
95
+ banayi
96
+ banda
97
+ bande
98
+ bandi
99
+ bane
100
+ bani
101
+ bas
102
+ bata
103
+ batao
104
+ bc
105
+ be
106
+ became
107
+ because
108
+ become
109
+ becomes
110
+ becoming
111
+ been
112
+ before
113
+ beforehand
114
+ behind
115
+ being
116
+ below
117
+ beside
118
+ besides
119
+ best
120
+ better
121
+ between
122
+ beyond
123
+ bhai
124
+ bheetar
125
+ bhi
126
+ bhitar
127
+ bht
128
+ bilkul
129
+ bohot
130
+ bol
131
+ bola
132
+ bole
133
+ boli
134
+ bolo
135
+ bolta
136
+ bolte
137
+ bolti
138
+ both
139
+ brief
140
+ bro
141
+ btw
142
+ but
143
+ by
144
+ came
145
+ can
146
+ cannot
147
+ cant
148
+ can't
149
+ cause
150
+ causes
151
+ certain
152
+ certainly
153
+ chahiye
154
+ chaiye
155
+ chal
156
+ chalega
157
+ chhaiye
158
+ clearly
159
+ c'mon
160
+ com
161
+ come
162
+ comes
163
+ could
164
+ couldn
165
+ couldnt
166
+ couldn't
167
+ d
168
+ de
169
+ dede
170
+ dega
171
+ degi
172
+ dekh
173
+ dekha
174
+ dekhe
175
+ dekhi
176
+ dekho
177
+ denge
178
+ dhang
179
+ di
180
+ did
181
+ didn
182
+ didnt
183
+ didn't
184
+ dijiye
185
+ diya
186
+ diyaa
187
+ diye
188
+ diyo
189
+ do
190
+ does
191
+ doesn
192
+ doesnt
193
+ doesn't
194
+ doing
195
+ done
196
+ dono
197
+ dont
198
+ don't
199
+ doosra
200
+ doosre
201
+ down
202
+ downwards
203
+ dude
204
+ dunga
205
+ dungi
206
+ during
207
+ dusra
208
+ dusre
209
+ dusri
210
+ dvaara
211
+ dvara
212
+ dwaara
213
+ dwara
214
+ each
215
+ edu
216
+ eg
217
+ eight
218
+ either
219
+ ek
220
+ else
221
+ elsewhere
222
+ enough
223
+ etc
224
+ even
225
+ ever
226
+ every
227
+ everybody
228
+ everyone
229
+ everything
230
+ everywhere
231
+ ex
232
+ exactly
233
+ example
234
+ except
235
+ far
236
+ few
237
+ fifth
238
+ fir
239
+ first
240
+ five
241
+ followed
242
+ following
243
+ follows
244
+ for
245
+ forth
246
+ four
247
+ from
248
+ further
249
+ furthermore
250
+ gaya
251
+ gaye
252
+ gayi
253
+ get
254
+ gets
255
+ getting
256
+ ghar
257
+ given
258
+ gives
259
+ go
260
+ goes
261
+ going
262
+ gone
263
+ good
264
+ got
265
+ gotten
266
+ greetings
267
+ haan
268
+ had
269
+ hadd
270
+ hadn
271
+ hadnt
272
+ hadn't
273
+ hai
274
+ hain
275
+ hamara
276
+ hamare
277
+ hamari
278
+ hamne
279
+ han
280
+ happens
281
+ har
282
+ hardly
283
+ has
284
+ hasn
285
+ hasnt
286
+ hasn't
287
+ have
288
+ haven
289
+ havent
290
+ haven't
291
+ having
292
+ he
293
+ hello
294
+ help
295
+ hence
296
+ her
297
+ here
298
+ hereafter
299
+ hereby
300
+ herein
301
+ here's
302
+ hereupon
303
+ hers
304
+ herself
305
+ he's
306
+ hi
307
+ him
308
+ himself
309
+ his
310
+ hither
311
+ hm
312
+ hmm
313
+ ho
314
+ hoga
315
+ hoge
316
+ hogi
317
+ hona
318
+ honaa
319
+ hone
320
+ honge
321
+ hongi
322
+ honi
323
+ hopefully
324
+ hota
325
+ hotaa
326
+ hote
327
+ hoti
328
+ how
329
+ howbeit
330
+ however
331
+ hoyenge
332
+ hoyengi
333
+ hu
334
+ hua
335
+ hue
336
+ huh
337
+ hui
338
+ hum
339
+ humein
340
+ humne
341
+ hun
342
+ huye
343
+ huyi
344
+ i
345
+ i'd
346
+ idk
347
+ ie
348
+ if
349
+ i'll
350
+ i'm
351
+ imo
352
+ in
353
+ inasmuch
354
+ inc
355
+ inhe
356
+ inhi
357
+ inho
358
+ inka
359
+ inkaa
360
+ inke
361
+ inki
362
+ inn
363
+ inner
364
+ inse
365
+ insofar
366
+ into
367
+ inward
368
+ is
369
+ ise
370
+ isi
371
+ iska
372
+ iskaa
373
+ iske
374
+ iski
375
+ isme
376
+ isn
377
+ isne
378
+ isnt
379
+ isn't
380
+ iss
381
+ isse
382
+ issi
383
+ isski
384
+ it
385
+ it'd
386
+ it'll
387
+ itna
388
+ itne
389
+ itni
390
+ itno
391
+ its
392
+ it's
393
+ itself
394
+ ityaadi
395
+ ityadi
396
+ i've
397
+ ja
398
+ jaa
399
+ jab
400
+ jabh
401
+ jaha
402
+ jahaan
403
+ jahan
404
+ jaisa
405
+ jaise
406
+ jaisi
407
+ jata
408
+ jayega
409
+ jidhar
410
+ jin
411
+ jinhe
412
+ jinhi
413
+ jinho
414
+ jinhone
415
+ jinka
416
+ jinke
417
+ jinki
418
+ jinn
419
+ jis
420
+ jise
421
+ jiska
422
+ jiske
423
+ jiski
424
+ jisme
425
+ jiss
426
+ jisse
427
+ jitna
428
+ jitne
429
+ jitni
430
+ jo
431
+ just
432
+ jyaada
433
+ jyada
434
+ k
435
+ ka
436
+ kaafi
437
+ kab
438
+ kabhi
439
+ kafi
440
+ kaha
441
+ kahaa
442
+ kahaan
443
+ kahan
444
+ kahi
445
+ kahin
446
+ kahte
447
+ kaisa
448
+ kaise
449
+ kaisi
450
+ kal
451
+ kam
452
+ kar
453
+ kara
454
+ kare
455
+ karega
456
+ karegi
457
+ karen
458
+ karenge
459
+ kari
460
+ karke
461
+ karna
462
+ karne
463
+ karni
464
+ karo
465
+ karta
466
+ karte
467
+ karti
468
+ karu
469
+ karun
470
+ karunga
471
+ karungi
472
+ kaun
473
+ kaunsa
474
+ kayi
475
+ kch
476
+ ke
477
+ keep
478
+ keeps
479
+ keh
480
+ kehte
481
+ kept
482
+ khud
483
+ ki
484
+ kin
485
+ kine
486
+ kinhe
487
+ kinho
488
+ kinka
489
+ kinke
490
+ kinki
491
+ kinko
492
+ kinn
493
+ kino
494
+ kis
495
+ kise
496
+ kisi
497
+ kiska
498
+ kiske
499
+ kiski
500
+ kisko
501
+ kisliye
502
+ kisne
503
+ kitna
504
+ kitne
505
+ kitni
506
+ kitno
507
+ kiya
508
+ kiye
509
+ know
510
+ known
511
+ knows
512
+ ko
513
+ koi
514
+ kon
515
+ konsa
516
+ koyi
517
+ krna
518
+ krne
519
+ kuch
520
+ kuchch
521
+ kuchh
522
+ kul
523
+ kull
524
+ kya
525
+ kyaa
526
+ kyu
527
+ kyuki
528
+ kyun
529
+ kyunki
530
+ lagta
531
+ lagte
532
+ lagti
533
+ last
534
+ lately
535
+ later
536
+ le
537
+ least
538
+ lekar
539
+ lekin
540
+ less
541
+ lest
542
+ let
543
+ let's
544
+ li
545
+ like
546
+ liked
547
+ likely
548
+ little
549
+ liya
550
+ liye
551
+ ll
552
+ lo
553
+ log
554
+ logon
555
+ lol
556
+ look
557
+ looking
558
+ looks
559
+ ltd
560
+ lunga
561
+ m
562
+ maan
563
+ maana
564
+ maane
565
+ maani
566
+ maano
567
+ magar
568
+ mai
569
+ main
570
+ maine
571
+ mainly
572
+ mana
573
+ mane
574
+ mani
575
+ mano
576
+ many
577
+ mat
578
+ may
579
+ maybe
580
+ me
581
+ mean
582
+ meanwhile
583
+ mein
584
+ mera
585
+ mere
586
+ merely
587
+ meri
588
+ might
589
+ mightn
590
+ mightnt
591
+ mightn't
592
+ mil
593
+ mjhe
594
+ more
595
+ moreover
596
+ most
597
+ mostly
598
+ much
599
+ mujhe
600
+ must
601
+ mustn
602
+ mustnt
603
+ mustn't
604
+ my
605
+ myself
606
+ na
607
+ naa
608
+ naah
609
+ nahi
610
+ nahin
611
+ nai
612
+ name
613
+ namely
614
+ nd
615
+ ne
616
+ near
617
+ nearly
618
+ necessary
619
+ neeche
620
+ need
621
+ needn
622
+ neednt
623
+ needn't
624
+ needs
625
+ neither
626
+ never
627
+ nevertheless
628
+ new
629
+ next
630
+ nhi
631
+ nine
632
+ no
633
+ nobody
634
+ non
635
+ none
636
+ noone
637
+ nope
638
+ nor
639
+ normally
640
+ not
641
+ nothing
642
+ novel
643
+ now
644
+ nowhere
645
+ o
646
+ obviously
647
+ of
648
+ off
649
+ often
650
+ oh
651
+ ok
652
+ okay
653
+ old
654
+ on
655
+ once
656
+ one
657
+ ones
658
+ only
659
+ onto
660
+ or
661
+ other
662
+ others
663
+ otherwise
664
+ ought
665
+ our
666
+ ours
667
+ ourselves
668
+ out
669
+ outside
670
+ over
671
+ overall
672
+ own
673
+ par
674
+ pata
675
+ pe
676
+ pehla
677
+ pehle
678
+ pehli
679
+ people
680
+ per
681
+ perhaps
682
+ phla
683
+ phle
684
+ phli
685
+ placed
686
+ please
687
+ plus
688
+ poora
689
+ poori
690
+ provides
691
+ pura
692
+ puri
693
+ q
694
+ que
695
+ quite
696
+ raha
697
+ rahaa
698
+ rahe
699
+ rahi
700
+ rakh
701
+ rakha
702
+ rakhe
703
+ rakhen
704
+ rakhi
705
+ rakho
706
+ rather
707
+ re
708
+ really
709
+ reasonably
710
+ regarding
711
+ regardless
712
+ regards
713
+ rehte
714
+ rha
715
+ rhaa
716
+ rhe
717
+ rhi
718
+ ri
719
+ right
720
+ s
721
+ sa
722
+ saara
723
+ saare
724
+ saath
725
+ sab
726
+ sabhi
727
+ sabse
728
+ sahi
729
+ said
730
+ sakta
731
+ saktaa
732
+ sakte
733
+ sakti
734
+ same
735
+ sang
736
+ sara
737
+ sath
738
+ saw
739
+ say
740
+ saying
741
+ says
742
+ se
743
+ second
744
+ secondly
745
+ see
746
+ seeing
747
+ seem
748
+ seemed
749
+ seeming
750
+ seems
751
+ seen
752
+ self
753
+ selves
754
+ sensible
755
+ sent
756
+ serious
757
+ seriously
758
+ seven
759
+ several
760
+ shall
761
+ shan
762
+ shant
763
+ shan't
764
+ she
765
+ she's
766
+ should
767
+ shouldn
768
+ shouldnt
769
+ shouldn't
770
+ should've
771
+ si
772
+ since
773
+ six
774
+ so
775
+ soch
776
+ some
777
+ somebody
778
+ somehow
779
+ someone
780
+ something
781
+ sometime
782
+ sometimes
783
+ somewhat
784
+ somewhere
785
+ soon
786
+ still
787
+ sub
788
+ such
789
+ sup
790
+ sure
791
+ t
792
+ tab
793
+ tabh
794
+ tak
795
+ take
796
+ taken
797
+ tarah
798
+ teen
799
+ teeno
800
+ teesra
801
+ teesre
802
+ teesri
803
+ tell
804
+ tends
805
+ tera
806
+ tere
807
+ teri
808
+ th
809
+ tha
810
+ than
811
+ thank
812
+ thanks
813
+ thanx
814
+ that
815
+ that'll
816
+ thats
817
+ that's
818
+ the
819
+ theek
820
+ their
821
+ theirs
822
+ them
823
+ themselves
824
+ then
825
+ thence
826
+ there
827
+ thereafter
828
+ thereby
829
+ therefore
830
+ therein
831
+ theres
832
+ there's
833
+ thereupon
834
+ these
835
+ they
836
+ they'd
837
+ they'll
838
+ they're
839
+ they've
840
+ thi
841
+ thik
842
+ thing
843
+ think
844
+ thinking
845
+ third
846
+ this
847
+ tho
848
+ thoda
849
+ thodi
850
+ thorough
851
+ thoroughly
852
+ those
853
+ though
854
+ thought
855
+ three
856
+ through
857
+ throughout
858
+ thru
859
+ thus
860
+ tjhe
861
+ to
862
+ together
863
+ toh
864
+ too
865
+ took
866
+ toward
867
+ towards
868
+ tried
869
+ tries
870
+ true
871
+ truly
872
+ try
873
+ trying
874
+ tu
875
+ tujhe
876
+ tum
877
+ tumhara
878
+ tumhare
879
+ tumhari
880
+ tune
881
+ twice
882
+ two
883
+ um
884
+ umm
885
+ un
886
+ under
887
+ unhe
888
+ unhi
889
+ unho
890
+ unhone
891
+ unka
892
+ unkaa
893
+ unke
894
+ unki
895
+ unko
896
+ unless
897
+ unlikely
898
+ unn
899
+ unse
900
+ until
901
+ unto
902
+ up
903
+ upar
904
+ upon
905
+ us
906
+ use
907
+ used
908
+ useful
909
+ uses
910
+ usi
911
+ using
912
+ uska
913
+ uske
914
+ usne
915
+ uss
916
+ usse
917
+ ussi
918
+ usually
919
+ vaala
920
+ vaale
921
+ vaali
922
+ vahaan
923
+ vahan
924
+ vahi
925
+ vahin
926
+ vaisa
927
+ vaise
928
+ vaisi
929
+ vala
930
+ vale
931
+ vali
932
+ various
933
+ ve
934
+ very
935
+ via
936
+ viz
937
+ vo
938
+ waala
939
+ waale
940
+ waali
941
+ wagaira
942
+ wagairah
943
+ wagerah
944
+ waha
945
+ wahaan
946
+ wahan
947
+ wahi
948
+ wahin
949
+ waisa
950
+ waise
951
+ waisi
952
+ wala
953
+ wale
954
+ wali
955
+ want
956
+ wants
957
+ was
958
+ wasn
959
+ wasnt
960
+ wasn't
961
+ way
962
+ we
963
+ we'd
964
+ well
965
+ we'll
966
+ went
967
+ were
968
+ we're
969
+ weren
970
+ werent
971
+ weren't
972
+ we've
973
+ what
974
+ whatever
975
+ what's
976
+ when
977
+ whence
978
+ whenever
979
+ where
980
+ whereafter
981
+ whereas
982
+ whereby
983
+ wherein
984
+ where's
985
+ whereupon
986
+ wherever
987
+ whether
988
+ which
989
+ while
990
+ who
991
+ whoever
992
+ whole
993
+ whom
994
+ who's
995
+ whose
996
+ why
997
+ will
998
+ willing
999
+ with
1000
+ within
1001
+ without
1002
+ wo
1003
+ woh
1004
+ wohi
1005
+ won
1006
+ wont
1007
+ won't
1008
+ would
1009
+ wouldn
1010
+ wouldnt
1011
+ wouldn't
1012
+ y
1013
+ ya
1014
+ yadi
1015
+ yah
1016
+ yaha
1017
+ yahaan
1018
+ yahan
1019
+ yahi
1020
+ yahin
1021
+ ye
1022
+ yeah
1023
+ yeh
1024
+ yehi
1025
+ yes
1026
+ yet
1027
+ you
1028
+ you'd
1029
+ you'll
1030
+ your
1031
+ you're
1032
+ yours
1033
+ yourself
1034
+ yourselves
1035
+ you've
1036
+ yup
subcategory_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e4d6681741bd155873a14009177a325a6019d117a2d62cc5a04390b13223b29
3
+ size 1304
vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1960e3d951ee705ae97a7e65b130093eb0d4f152e6f70e55bcd4ed60244b406e
3
+ size 158081