詞頻直方圖
算詞頻
import nltk
#加載Gutenberg語料庫
from nltk.corpus import gutenberg
gutenberg.fileids()
['austen-emma.txt',
'austen-persuasion.txt',
'austen-sense.txt',
'bible-kjv.txt',
'blake-poems.txt',
'bryant-stories.txt',
'burgess-busterbrown.txt',
'carroll-alice.txt',
'chesterton-ball.txt',
'chesterton-brown.txt',
'chesterton-thursday.txt',
'edgeworth-parents.txt',
'melville-moby_dick.txt',
'milton-paradise.txt',
'shakespeare-caesar.txt',
'shakespeare-hamlet.txt',
'shakespeare-macbeth.txt',
'whitman-leaves.txt']
#挑選簡·奧斯丁的《愛瑪》
emma=nltk.corpus.gutenberg.words('austen-emma.txt')
type(emma)
nltk.corpus.reader.util.StreamBackedCorpusView
len(emma)
192427
#每個單詞平均被使用的次數
len(emma)/len(set(emma))
24.63538599411087
len(set(emma))
7811
sorted(set(emma))#生成詞彙列表,在排序中,大寫在小寫之前,依ANSIC碼排序
['!',
'!"',
'!"--',
"!'",
"!'--",
'!)--',
'!--',
'!--"',
'!--(',
'!--`',
'"',
'"\'',
'"--',
'"`',
'&',
"'",
"'--",
"';",
'(',
')',
'),',
')--',
').',
').--',
');--',
',',
',"',
',"--',
",'",
',\'"',
',)',
',--',
',--"',
'-',
'--',
'--"',
'--(',
'--,',
'----',
'----------,',
"--------.'",
'--.',
'--."',
"--.'",
'--:',
'--`',
'.',
'."',
'."--',
".'",
'.\'"',
".'--",
".'--`",
'.)',
'.,',
'.,"',
".,'",
'.--',
'.--"',
'.--`',
'.]',
'000',
'10',
'1816',
'23rd',
'24th',
'26th',
'28th',
'7th',
'8th',
':',
':"',
':"--',
":'",
":'--",
':--',
':--"',
';',
';"',
';"--',
";'",
";'--",
';--',
';--"',
'?',
'?"',
'?"--',
'?"--"',
"?'",
'?\'"',
'?)--',
'?--',
'?--"',
'?--(',
'A',
'Abbey',
'Abbots',
'Abdy',
'Abominable',
'About',
'Absence',
'Absolute',
'Absolutely',
'Absurd',
'According',
'Accordingly',
'Acquit',
'Actually',
'Adelaide',
'Adopt',
'After',
'Agreed',
'Agricultural',
'Ah',
'Aladdin',
'Alas',
'Alderneys',
'All',
'Almane',
'Almost',
'Although',
'Altogether',
'Always',
'Am',
'Ambition',
'Amiable',
'An',
'And',
'Angry',
'Anna',
'Anne',
'Another',
'Anxious',
'Any',
'Anywhere',
'Apologies',
'Approve',
'April',
'Are',
'Arthur',
'As',
'Assured',
'Astley',
'Astonished',
'At',
'August',
'Augusta',
'Aunt',
'Austen',
'Aye',
'Bad',
'Balls',
'Baly',
'Barnes',
'Baronne',
'Bates',
'Bateses',
'Bath',
'Be',
'Bear',
'Beautiful',
'Beavers',
'Before',
'Beg',
'Behold',
'Being',
'Believe',
'Bella',
'Besides',
'Better',
'Between',
'Beyond',
'Bickerton',
'Bird',
'Birmingham',
'Birth',
'Bless',
'Blessed',
'Boarding',
'Bond',
'Books',
'Both',
'Bought',
'Box',
'Bragge',
'Bragges',
'Braithwaites',
'Break',
'Bristol',
'Broadway',
'Broadwood',
'Brother',
'Brown',
'Brunswick',
'Business',
'Busy',
'But',
'By',
'C',
'CHAPTER',
'CHARADE',
'CHURCHILL',
'Call',
'Campbell',
'Campbells',
'Can',
'Candles',
'Cannot',
'Captain',
'Caroline',
'Catherine',
'Cautious',
'Ceremonies',
'Certain',
'Certainly',
'Charming',
'Children',
'Chili',
'Christian',
'Christmas',
'Church',
'Churchill',
'Churchills',
'Chuse',
'Circumstances',
'Clara',
'Clayton',
'Clifton',
'Cobham',
'Cole',
'Coles',
'Colonel',
'Come',
'Command',
'Common',
'Compare',
'Compliments',
'Composure',
'Compressed',
'Comtesse',
'Conceive',
'Concession',
'Conjecture',
'Consider',
'Considering',
'Contrary',
'Cooper',
'Could',
'Cowper',
'Cox',
'Coxe',
'Coxes',
'Cramer',
'Cromer',
'Crown',
'DEAR',
'Dancing',
'Dating',
'Day',
'Dear',
'Dearer',
'Deceived',
'December',
'Decidedly',
'Delighted',
'Delightful',
'Depend',
'Did',
'Difference',
'Dining',
'Dinner',
'Dirty',
'Disingenuousness',
'Disputable',
'Dixon',
'Dixons',
'Do',
'Does',
'Don',
'Donwell',
'Dorking',
'Dr',
'Dreadful',
'Dublin',
'During',
'E',
'Early',
'Easter',
'Either',
'Elegant',
'Elizabeth',
'Elton',
'Eltons',
'Em',
'Emma',
'Encouragement',
'End',
'Engaged',
'England',
'English',
'Enscombe',
'Escape',
'Especially',
'Even',
'Ever',
'Every',
'Exactly',
'Excellent',
'Excellently',
'Except',
'Excepting',
'Excuse',
'Exquisite',
'Extracts',
'Extraordinary',
'Extremely',
'F',
'FINIS',
'Fairfax',
'Fancying',
'Farm',
'Farmer',
'February',
'Fetch',
'Find',
'Fine',
'Finesse',
'Five',
'For',
'Forcing',
'Ford',
'Forest',
'Former',
'Fortunate',
'Fortunately',
'Fortune',
'Four',
'Fourteen',
'Frank',
'French',
'Friday',
'From',
'Full',
'Garrick',
'General',
'Genlis',
'George',
'Gilbert',
'Gilberts',
'Give',
'Go',
'God',
'Goddard',
'Going',
'Goldsmith',
'Gone',
'Good',
'Graham',
'Grandmama',
'Grandpapa',
'Granted',
'Gratifying',
'Great',
'Green',
'Grove',
'Ha',
'Had',
'Half',
'Hall',
'Handsome',
'Hannah',
'Happier',
'Happily',
'Happy',
'Harriet',
'Harry',
'Hart_',
'Hartfield',
'Has',
'Have',
'Having',
'Hawkins',
'Hazle',
'He',
'Heaven',
'Heavens',
'Henceforward',
'Henry',
'Her',
'Here',
'Hetty',
'High',
'Highbury',
'Hill',
'Him',
'His',
'Hitherto',
'Hodges',
'Holyhead',
'How',
'However',
'Hughes',
'Hum',
'Human',
'Humph',
'Hush',
'Hymen',
'I',
'II',
'III',
'IV',
'IX',
'If',
'Ill',
'Imagine',
'Immediately',
'Impossible',
'Impropriety',
'Imprudent',
'In',
'Increase',
'Indeed',
'Indifferent',
'Indignation',
'Inn',
'Instances',
'Instead',
'Insufferable',
'Interference',
'Intimacy',
'Invite',
'Ireland',
'Irish',
'Is',
'Isabella',
'It',
'Italian',
'Its',
'JULY',
'James',
'Jane',
'January',
'Jeffereys',
'John',
'Judge',
'July',
'June',
'Just',
'K',
'Keep',
'Kindled',
'King',
'Kings',
'Kingston',
'Kitty',
'Knightley',
'Knightleys',
'La',
'Ladies',
'Lady',
'Lane',
'Langham',
'Larkins',
'Late',
'Later',
'Latterly',
'Leave',
'Let',
'Letters',
'Liable',
'Lieut',
'Like',
'Little',
'Lively',
'Living',
'London',
'Long',
'Look',
'Lord',
'Lords',
'Low',
'M',
'MADAM',
'MY',
'Ma',
'Madam',
'Madame',
'Madeira',
'Madness',
'Making',
'Man',
'Manchester',
'Manners',
'Many',
'Maple',
'March',
'Mark',
'Marriage',
'Married',
'Martin',
'Martins',
'Master',
'Matrimony',
'May',
'Me',
'Men',
'Mermaids',
'Methodical',
'Michaelmas',
'Mickleham',
'Middling',
'Midsummer',
'Might',
'Mill',
'Milmans',
'Mine',
'Miniatures',
'Miss',
'Misses',
'Mistake',
'Mistresses',
'Mitchell',
'Monday',
'More',
'Morning',
'Most',
'Mr',
'Mrs',
'Much',
'Must',
'My',
'Myself',
'Mystery',
'N',
'Name',
'Nash',
'Natural',
'Nature',
'Nay',
'Neither',
'Neptune',
'Never',
'News',
'No',
'Nobody',
'None',
'Nonsense',
'Nonsensical',
'Nor',
'Not',
'Nothing',
'November',
'Now',
'Observe',
'October',
'Of',
'Offended',
'Offices',
'Often',
'Oftentimes',
'Oh',
'On',
'Once',
'One',
'Only',
'Open',
'Or',
'Ostalis',
'Other',
'Otway',
'Otways',
'Ought',
'Our',
'Ours',
'Oxford',
'Pain',
'Papa',
'Pardon',
'Park',
'Part',
'Partridge',
'Pass',
'Patroness',
'Patty',
'Peculiarly',
'Pembroke',
'People',
'Perfect',
'Perfectly',
'Perhaps',
'Perry',
'Perrys',
'Philippics',
'Picture',
'Pilfering',
'Place',
'Plain',
'Playing',
'Pleasant',
'Pleasure',
'Poor',
'Poverty',
'Pray',
'Prejudiced',
'Presently',
'Pretty',
'Prince',
'Proof',
'Proportions',
'Put',
'Quantities',
'Quick',
'Quite',
'Randall',
'Randalls',
'Rather',
'Read',
'Real',
'Receive',
'Referring',
'Remember',
'Reports',
'Resentment',
'Respect',
'Richard',
'Richardson',
'Richmond',
'Robert',
'Romance',
'Rousing',
'S',
'Satisfied',
'Saturday',
'Saunders',
'Say',
'School',
'Scotland',
'Seats',
'See',
'Seldom',
'Selina',
'Sept',
'September',
'Serious',
'Serle',
'Service',
'Seven',
'Shakespeare',
'Shall',
'She',
'Shocking',
'Short',
'Shortly',
'Should',
'Sighs',
'Since',
'Sir',
'Six',
'Sixteen',
'Sixty',
'Skilful',
'Small',
'Smallridge',
'Smiles',
'Smith',
'Smiths',
'So',
'Soft',
'Some',
'Somebody',
'Something',
'Sometimes',
'Son',
'Soon',
'Sorrow',
'Soup',
'South',
'Square',
'St',
'Standing',
'Stay',
'Still',
'Stilton',
'Stokes',
'Stop',
'Success',
'Such',
'Suckling',
'Sucklings',
'Sunday',
'Supper',
'Suppose',
'Supposing',
'Surprizes',
'Surry',
'Swisserland',
'Take',
'Talking',
'Tan',
'Taylor',
'Tea',
'Tell',
'Ten',
'Thank',
'That',
'The',
'Their',
'Then',
'Theodore',
'There',
'These',
'They',
'Things',
'Think',
'This',
'Those',
'Though',
'Three',
'Thy',
'Till',
'Time',
'Tiresome',
'Tis',
'To',
'Tom',
'Too',
'Towards',
'Trouble',
'True',
'Trust',
'Tuesday',
'Tunbridge',
'Tupman',
'Tupmans',
'Two',
'Uncle',
'Under',
'Understanding',
'Undoubtedly',
'Ungrateful',
'Unwelcome',
'Upon',
'Used',
'V',
'VI',
'VII',
'VIII',
'VOLUME',
'Vanity',
'Venice',
'Very',
'Vicar',
'Vicarage',
'Vigorous',
'Voices',
'W',
'WESTON',
'WINDSOR',
'Waiving',
'Wakefield',
'Walk',
'Wallis',
'Wallises',
'Want',
'Warmth',
'Was',
'Wax',
'We',
'Weather',
'Wednesday',
'Welch',
'Well',
'Were',
'West',
'Weston',
'Westons',
'Weymouth',
'What',
'Whatever',
'When',
'Whenever',
'Where',
'Whether',
'Which',
'While',
'Who',
'Whoever',
'Whom',
'Why',
'Wickedness',
'Will',
'William',
'Wiltshire',
'Windsor',
'Wingfield',
'Wish',
'With',
'Within',
'Without',
'Witness',
'Woman',
'Women',
'Woodhouse',
'Woodhouses',
'Worse',
'Would',
'Wrapt',
'Wright',
'Wrong',
'X',
'XI',
'XII',
'XIII',
'XIV',
'XIX',
'XV',
'XVI',
'XVII',
'XVIII',
'Yes',
'Yet',
'York',
'Yorkshire',
'You',
'Young',
'Your',
'Yours',
'[',
']',
'_',
'_Adair_',
'_Bath_',
'_Chaperon_',
'_Courtship_',
'_Dixon_',
'_Dixons_',
'_Elton_',
'_He_',
'_Her_',
'_His_',
'_I_',
'_May_',
'_Miss_',
'_Most_',
'_Mr_',
'_Mrs',
'_Mrs_',
'_My_',
'_Now_',
'_Perfection_',
'_Philip_',
'_Rev',
'_Robin_',
'_She_',
'_Some_',
'_Taylor_',
'_The_',
'_There_',
'_We_',
'_What_',
'_White',
'_Woodhouse_',
'_You_',
'_______',
'_a_',
'_accepted_',
'_addition_',
'_all_',
'_almost_',
'_alone_',
'_amor_',
'_and_',
'_answer_',
'_any_',
'_appropriation_',
'_as_',
'_assistance_',
'_at_',
'_be_',
'_been_',
'_blunder_',
'_boiled_',
'_both_',
'_bride_',
'_broke_',
'_caro_',
'_cause_',
'_compassion_',
'_compliments_',
'_court_',
'_courtship_',
'_did_',
'_dissolved_',
'_doubts_',
'_each_',
'_eighteen_',
'_engagement_',
'_evening_',
'_felt_',
'_first_',
'_gentleman_',
'_great_',
'_greater_',
'_had_',
'_half_',
'_happily_',
'_has_',
'_have_',
'_he_',
'_her_',
'_here_',
'_him_',
'_his_',
'_home_',
'_housebreaking_',
'_introduction_',
'_invite_',
'_is_',
'_it_',
'_joint_',
'_just_',
'_lady_',
'_letting_',
'_little_',
'_man_',
'_married_',
'_marry_',
'_me_',
'_mediocre_',
'_misery_',
'_moment_',
'_more_',
'_must_',
'_my_',
'_named_',
'_names_',
'_nearer_',
'_not_',
'_now_',
'_of_',
'_one_',
'_our_',
'_own_',
'_part_',
'_particular_',
'_party_',
'_patriae_',
'_precious_',
'_present_',
'_presume_',
'_promise_',
'_purport_',
'_recollecting_',
'_refused_',
'_repentance_',
'_respect_',
'_sacrifice_',
'_say_',
'_secret_',
'_sensation_',
'_shall_',
'_she_',
'_ship_',
'_should_',
'_small_',
'_some_',
'_source_',
'_sposo_',
'_tell_',
'_ten_',
'_that_',
'_the_',
'_them_',
'_then_',
'_they_',
'_thing_',
'_think_',
'_thoughts_',
'_three_',
'_time_',
'_times_',
'_to_',
'_told_',
'_treasures_',
'_try_',
'_two_',
'_understand_',
'_unreasonable_',
'_unrequited_',
'_us_',
'_very_',
'_wanted_',
'_was_',
'_way_',
'_we_',
'_well_',
'_were_',
'_when_',
'_who_',
'_will_',
...]
#其中FreqDist是nltk.probability類下的!!
from nltk.probability import FreqDist
fdist1=FreqDist(emma)
fdist1 #得到一個字典,希望分解成兩個對應的列表
FreqDist({'lighter': 1,
'victims': 1,
'Square': 11,
'playfully': 2,
'mortified': 8,
'fill': 2,
'wait': 22,
'watched': 8,
'vessel': 1,
'quite': 269,
'humanity': 3,
'running': 4,
'ajar': 2,
'witnessed': 4,
'exercise': 17,
'oppress': 1,
'treble': 1,
'foundation': 3,
'conversed': 1,
'sooner': 12,
'candidly': 1,
'sit': 38,
'fresh': 14,
'flock': 1,
'accomplish': 2,
'jealous': 3,
'invalid': 4,
'composedly': 2,
'thing': 398,
'improving': 1,
'settle': 12,
'thought': 226,
'popularity': 4,
'untowardly': 1,
'luxury': 4,
'tears': 9,
'disgust': 5,
'undiscerned': 1,
'parlour': 12,
'minutes': 53,
'purchasing': 1,
'conveyance': 1,
'lost': 21,
'strangest': 1,
'sending': 7,
'chained': 1,
'palatable': 1,
'invariable': 1,
'regardless': 2,
'wretched': 12,
'painful': 5,
'needless': 3,
'unavoidable': 1,
'purity': 1,
'raised': 6,
'ingeniously': 1,
'dependence': 11,
'regretted': 5,
'&': 3,
'remedy': 1,
'Forcing': 1,
'artificial': 1,
'Let': 26,
'orderly': 1,
'wearying': 1,
'Read': 2,
'instrumental': 1,
'denied': 4,
'destined': 4,
'misunderstandings': 4,
'channel': 1,
'sheet': 1,
'telling': 22,
'Brown': 1,
'broader': 2,
'keen': 1,
'on': 677,
'prominent': 3,
'Mill': 11,
'Augusta': 4,
'retire': 1,
'smallest': 20,
'requiring': 2,
'remote': 1,
'Dublin': 1,
'blow': 3,
'somewhat': 4,
'felling': 1,
'amazed': 4,
'amazingly': 2,
'tones': 1,
'uncivil': 1,
'Pleasure': 1,
'bonnet': 3,
'bid': 4,
'jokes': 1,
'method': 3,
'instruments': 1,
'unpersuadable': 3,
'wondered': 4,
'spread': 7,
'seduced': 1,
'despatched': 1,
'concise': 2,
'soon': 221,
'endeavouring': 3,
'lamented': 2,
'My': 108,
'that': 1730,
'March': 1,
'promoting': 1,
'stronger': 8,
'testify': 1,
'manner': 75,
'hero': 2,
'persuasions': 1,
'listener': 1,
'raising': 3,
'dearly': 2,
'reproached': 1,
'two': 171,
'performance': 8,
'provide': 4,
'was': 2385,
'plenty': 5,
'_wanted_': 1,
'valuable': 9,
'any': 651,
'goose': 2,
'visited': 5,
'anxiety': 19,
'subduing': 2,
'scheme': 23,
'order': 14,
'illegitimacy': 2,
'"\'': 1,
'comparatively': 2,
'Exactly': 7,
'Anxious': 1,
'Such': 49,
'imagined': 20,
'News': 1,
'Alderneys': 1,
'battle': 1,
'gardens': 7,
'dirt': 1,
'packs': 1,
'sister': 33,
'preparing': 5,
'overcome': 9,
'humoured': 6,
'enjoy': 9,
'trade': 7,
'blaming': 1,
'overheard': 2,
'lashes': 2,
'condescending': 1,
'important': 20,
'hurried': 8,
'imposing': 1,
'unpleasant': 13,
'Catherine': 1,
'ing': 1,
'relate': 5,
'_He_': 2,
'flourishing': 1,
'Baly': 1,
'arrived': 18,
':"--': 2,
'undistinguishing': 1,
'discrimination': 1,
'desultory': 1,
'mysteriously': 1,
'summoned': 1,
'_names_': 1,
'spending': 5,
'climate': 1,
'self': 23,
'respite': 1,
'grandeur': 3,
'Its': 4,
'pressing': 6,
'reasonable': 20,
'eats': 2,
'killed': 4,
'belonging': 4,
'Randall': 1,
'industry': 1,
'disperse': 1,
'heap': 1,
'impair': 1,
'Humph': 4,
'readiest': 1,
'perfectly': 65,
'bath': 1,
'softening': 2,
'turning': 24,
'sparkling': 2,
'Graham': 1,
'lieu': 1,
'contributing': 1,
'worshipping': 1,
'understood': 23,
'placing': 3,
'affairs': 5,
'Yorkshire': 8,
'_told_': 1,
'gala': 1,
'afraid': 65,
'hear': 100,
'description': 7,
'year': 28,
'uneasiness': 10,
'scouted': 1,
'haunting': 1,
'price': 2,
'excepted': 2,
'Actually': 2,
'darling': 1,
'al': 1,
'chattering': 1,
'Lane': 6,
'player': 2,
'consent': 14,
'elsewhere': 2,
'slowly': 6,
'fashioned': 3,
'determination': 5,
'residence': 4,
'blockhead': 3,
'modest': 3,
'other': 220,
'complains': 2,
'variations': 1,
'impending': 1,
'Emma': 865,
'need': 42,
'escorted': 2,
'violent': 2,
'amendment': 1,
'inspire': 1,
'An': 12,
'rice': 1,
'Presently': 2,
'gets': 4,
'Miniatures': 1,
'Some': 14,
'flame': 1,
'Abbots': 1,
'cloth': 1,
'gratitude': 29,
'chapter': 1,
'worn': 3,
'Enscombe': 36,
'shocked': 7,
'gains': 1,
'escaped': 7,
'felicities': 2,
'pretty': 66,
'revealed': 2,
'crosser': 1,
'corroborating': 1,
'reminding': 2,
'exclaiming': 4,
'circumspection': 1,
'guinea': 1,
'as': 1387,
'contemplating': 1,
'cordially': 4,
'event': 24,
'_your_': 5,
'counsellor': 2,
'oppression': 1,
'survey': 1,
'Wakefield': 1,
'diet': 1,
'forward': 36,
'piece': 13,
'quarter': 25,
'Gilberts': 1,
'mortify': 1,
'complimenter': 1,
'rousing': 2,
'wheres': 1,
'petticoat': 2,
'_small_': 1,
'desperate': 3,
'engaging': 7,
'sense': 56,
'decease': 2,
'notice': 27,
'adversary': 1,
'privileged': 2,
'little': 354,
'late': 26,
'Smiths': 1,
'_home_': 2,
'slackened': 1,
'unprepared': 2,
'breath': 3,
'fits': 1,
'business': 54,
'mimic': 1,
'illnesses': 3,
'formal': 4,
'Stop': 2,
'distinguished': 3,
'poignant': 2,
'expressed': 10,
'procuring': 5,
'precedes': 1,
'butcher': 2,
'Under': 2,
'sick': 10,
'ware': 1,
'l': 2,
'alphabets': 2,
':"': 2,
'charges': 1,
'fondly': 4,
'pencilled': 1,
'occurs': 1,
'periods': 1,
'whoever': 2,
'thorough': 12,
'labour': 4,
'borrow': 1,
'lengths': 2,
'resources': 10,
'known': 60,
'tea': 24,
'syllable': 10,
'waverings': 1,
'explained': 3,
'needed': 2,
'quarto': 1,
'attractive': 2,
'obstinate': 1,
'_and_': 1,
'gallant': 11,
'February': 7,
'sees': 3,
'mock': 1,
'calmly': 3,
'suffering': 16,
'begs': 1,
'auspices': 1,
'solemnly': 1,
'engrosses': 1,
'mode': 1,
'vanity': 13,
'pales': 1,
'unhealthy': 2,
'emotion': 3,
'surmises': 1,
'militia': 2,
'undesirable': 1,
'first': 209,
'talks': 4,
'possible': 84,
'opportunities': 2,
'_both_': 1,
'!': 549,
'associates': 2,
'missing': 3,
'unseen': 3,
'came': 119,
'close': 18,
'since': 63,
'Coles': 17,
'leniently': 1,
'warmly': 17,
'improvidently': 1,
'hoped': 43,
'misinterpreted': 2,
'drawing': 30,
'intends': 1,
'canvassing': 1,
'Understanding': 2,
'unfeelingly': 1,
'sent': 33,
'pools': 1,
'Bristol': 8,
'line': 16,
'stop': 13,
'disappoint': 3,
'sunk': 10,
'alteration': 2,
'8th': 1,
'cheeks': 4,
'defined': 1,
'ascending': 1,
'living': 17,
'clinging': 1,
'tranquil': 4,
'Robert': 32,
'whichever': 1,
'depended': 8,
'predict': 2,
'youngest': 4,
'comprised': 1,
'visit': 86,
'whole': 76,
'fairy': 3,
'certainty': 10,
'communication': 9,
'surely': 3,
'privileges': 1,
'shoulders': 3,
'End': 8,
'whispers': 1,
'softer': 3,
'prime': 1,
'occupation': 4,
'Forest': 2,
'Hartfield': 160,
'gifted': 1,
'Somebody': 5,
'_would_': 1,
'bleak': 1,
'unmirthful': 1,
'agreeably': 5,
'portion': 3,
'camp': 1,
'timidity': 1,
'opposition': 2,
'insensibility': 2,
'unemployed': 1,
'successively': 1,
'creditably': 1,
'filled': 2,
'malt': 1,
'bare': 2,
'fireside': 4,
'?\'"': 2,
'evinced': 1,
'raptures': 5,
'delegating': 1,
'service': 22,
'panic': 2,
'discoveries': 4,
'food': 3,
'analogy': 1,
'comprehend': 15,
'_bride_': 1,
'who': 281,
'glimpse': 4,
'faith': 5,
'hastening': 1,
'disposition': 24,
'concealment': 11,
'VOLUME': 3,
'expressions': 8,
'shoes': 4,
'sashed': 2,
'rush': 2,
'enduring': 1,
'idea': 100,
'portfolio': 1,
'limb': 1,
'illiterate': 2,
'Soon': 3,
'appreciating': 1,
'Wrapt': 1,
'drown': 1,
'estate': 4,
'reassembled': 1,
'apartment': 2,
'possess': 2,
'_each_': 1,
'side': 71,
'else': 80,
'dissipate': 1,
'woollen': 1,
'Weather': 1,
'readily': 7,
'folding': 1,
'thoughts': 38,
'language': 9,
'deplore': 2,
'coldest': 1,
'Granted': 1,
'separations': 1,
'infection': 3,
'Easter': 2,
'Son': 1,
'changed': 6,
'uncouthness': 1,
'appendages': 1,
'teachers': 3,
'disapprobation': 1,
'_we_': 6,
'Anne': 3,
'rightly': 4,
'Alas': 3,
'engrossing': 1,
'restrained': 2,
'concert': 1,
'main': 2,
'pointing': 1,
'matrimony': 7,
'benevolent': 2,
'wrist': 1,
'eldest': 8,
'governed': 2,
'unfelt': 1,
'desirable': 18,
'extricated': 1,
'praising': 1,
'experienced': 1,
'stain': 2,
'Absence': 1,
'streets': 1,
'anticipation': 4,
'directly': 51,
'haberdasher': 1,
'pays': 4,
'accord': 1,
'more': 464,
'discordancies': 1,
'transfer': 2,
'spoiled': 4,
'presumption': 7,
'Master': 1,
'Uncle': 2,
'trophies': 1,
'fetch': 6,
'aye': 2,
'want': 89,
'undesigned': 1,
'basis': 2,
'ringing': 1,
'dignities': 1,
'confers': 1,
'retort': 1,
'hungry': 3,
'entitled': 3,
'however': 114,
'insane': 1,
'forms': 1,
'escape': 14,
'politeness': 7,
'second': 31,
'throat': 12,
'grandpapas': 1,
'lonely': 1,
'spurn': 1,
'coachman': 5,
'unexceptionably': 1,
'played': 8,
'discover': 4,
'honestly': 3,
'discretion': 4,
'quarrelsome': 1,
'cautiousness': 1,
'courageous': 1,
'Immediately': 1,
'dissuade': 3,
'succeeded': 11,
'combine': 1,
'Certainly': 11,
'_unrequited_': 1,
'avail': 2,
'bailiff': 1,
'finger': 3,
'constitution': 7,
'madness': 4,
'capable': 6,
'chusing': 2,
'comforts': 8,
'downstairs': 4,
'sullenness': 1,
'discussed': 1,
'black': 5,
'temporary': 2,
'gentlewoman': 2,
'represent': 1,
'brunt': 1,
'strikingly': 1,
'watering': 3,
'shut': 9,
'argumentative': 1,
'stretch': 2,
'instigator': 1,
'feet': 1,
'dumplings': 1,
'composure': 7,
'league': 1,
'knowledge': 26,
'effectually': 3,
'rapturous': 1,
'room': 117,
'prodigy': 2,
'achievement': 1,
'support': 8,
'plainly': 6,
'buyings': 1,
'fence': 1,
'varying': 1,
'testifying': 1,
').': 4,
'unsuspected': 2,
'parsnip': 1,
'breeding': 2,
'cheerfulness': 6,
'down': 70,
'amounted': 1,
'marked': 3,
'smooth': 7,
'sharer': 1,
'than': 415,
'glorious': 1,
'unnecessary': 8,
'deal': 92,
'transition': 3,
'eyeing': 2,
'rest': 50,
'fathomed': 1,
'baby': 6,
'summer': 23,
'attends': 1,
'chuses': 4,
'critical': 2,
'doom': 1,
'precisely': 7,
'calculated': 3,
'shewed': 17,
'dwelt': 4,
'expected': 40,
'_My_': 2,
'Offices': 1,
'Surry': 9,
'whispered': 3,
'arms': 2,
'supposes': 1,
'opens': 1,
'journey': 12,
'supposed': 35,
'cheap': 2,
'drain': 1,
'ball': 31,
'grant': 3,
'happening': 4,
'inroads': 1,
'popular': 1,
'unconcerned': 2,
'pursued': 1,
'Beg': 1,
'behind': 19,
'push': 1,
'wonderful': 13,
'enters': 1,
'formation': 1,
'astonish': 1,
'deference': 2,
'detached': 1,
'deathbed': 1,
'moderate': 7,
'repast': 1,
'drawn': 15,
'involuntary': 1,
'!--"': 1,
'Absurd': 1,
'eat': 12,
'readier': 1,
'productive': 1,
'personally': 1,
'fortitude': 4,
'sympathise': 1,
'improve': 7,
'pleases': 3,
'rise': 5,
'pork': 11,
'pursue': 1,
'languor': 2,
'travels': 1,
'Pray': 15,
'Children': 1,
'confessing': 4,
'sets': 1,
'daringly': 1,
'trick': 5,
'notions': 6,
'seriously': 16,
'dropt': 4,
'containing': 5,
'lessened': 2,
'exaggeration': 1,
'couplet': 1,
'solace': 2,
'Tuesday': 7,
'Till': 7,
'capricious': 1,
'gracious': 3,
'_two_': 1,
'timed': 1,
'disagree': 4,
'Pass': 2,
'bursts': 1,
'multiplied': 2,
'pretended': 1,
'reluctant': 1,
'intreat': 1,
'returned': 39,
'pre': 1,
'insufferable': 6,
'boy': 13,
'carried': 5,
'papas': 1,
'comforted': 3,
'prosperity': 3,
'state': 56,
'intelligence': 13,
'gaiters': 1,
'Ours': 1,
'underrated': 2,
'mark': 3,
'veils': 1,
'renewal': 3,
'carelessness': 1,
'former': 13,
'mixture': 8,
'pert': 3,
'inevitably': 2,
"!'": 6,
'_mediocre_': 1,
'asserted': 2,
'within': 29,
'how': 263,
'indistinct': 1,
'would': 815,
'having': 145,
'Ireland': 14,
'washing': 1,
'resulting': 1,
'obligation': 5,
'sacks': 1,
'supply': 9,
'Bates': 148,
'turnips': 1,
'considerable': 17,
'thereabouts': 1,
'call': 41,
'rencontre': 3,
'quickness': 9,
'interest': 47,
'hindrance': 1,
'yield': 4,
'fold': 1,
'made': 199,
'tone': 24,
'increase': 12,
'sleek': 1,
'attorney': 1,
'unfit': 4,
'apples': 15,
'privation': 1,
'pressingly': 1,
'for': 1321,
'evening': 96,
'from': 535,
'served': 1,
'pop': 1,
'repugnance': 1,
'intimates': 2,
'Knightley': 389,
'peculiar': 7,
'talking': 52,
'apprehensively': 1,
'heated': 3,
'Assured': 1,
'draw': 10,
'bride': 13,
'satisfactions': 1,
'recovered': 12,
'required': 14,
'spot': 7,
'Campbells': 26,
'welcome': 16,
'pack': 1,
'beet': 1,
'Astley': 4,
'pitifullest': 1,
'observing': 8,
'confession': 4,
'strong': 44,
'agriculture': 1,
'give': 157,
'Christian': 2,
'splendour': 2,
'distinct': 10,
'disgusting': 2,
'neighbourhood': 15,
'doer': 1,
'packet': 1,
'Compliments': 1,
'differing': 1,
'consciously': 1,
'note': 23,
'come': 159,
'nervously': 1,
'manage': 5,
'plain': 22,
'weight': 4,
'notes': 1,
'sale': 2,
'absent': 10,
'dreams': 1,
'announce': 7,
'curled': 1,
'How': 108,
'Has': 7,
'bewildered': 3,
'acting': 8,
'emulate': 1,
'childhood': 2,
'gave': 54,
'recurrence': 2,
'_Rev': 1,
'neglect': 6,
'accent': 5,
'cabbage': 1,
'_well_': 2,
'great': 263,
'heal': 2,
'inclination': 21,
'token': 1,
'departed': 1,
'hesitating': 4,
'edition': 1,
'buy': 1,
'consult': 5,
'consideration': 24,
'compliments': 14,
'fondness': 2,
'merest': 1,
'unkind': 1,
'stoop': 2,
'broad': 7,
'Used': 1,
'lending': 1,
'pantry': 1,
'attained': 1,
'services': 1,
'Happy': 3,
'disagreement': 3,
'sincere': 5,
'guessing': 5,
'confinement': 2,
'introductions': 1,
'mercy': 3,
'plentiful': 1,
'conceived': 3,
'ostentation': 1,
'tete': 8,
'foresaw': 3,
'listen': 18,
'contrasted': 1,
'anywhere': 15,
'hurrying': 9,
'throughout': 4,
'remain': 14,
'kept': 22,
'vouchsafed': 1,
'demerits': 1,
'ceremony': 7,
'nor': 63,
'kinder': 5,
'deduction': 2,
'Two': 7,
'dependent': 3,
'free': 5,
'yellow': 3,
'age': 22,
'atonement': 2,
'regrets': 6,
'salted': 2,
'recollection': 8,
'along': 4,
'entreated': 4,
'partial': 7,
'travelling': 5,
'passes': 3,
'Donwell': 49,
'farmer': 8,
'delightfully': 8,
'spectacles': 9,
'amidst': 1,
'sketches': 2,
'_very_': 10,
'despoiling': 1,
'mutual': 5,
'man': 233,
'hesitatingly': 3,
";'--": 1,
'kindness': 40,
'compassionate': 4,
'office': 16,
'unbecoming': 1,
'celibacy': 1,
'work': 22,
'themselves': 40,
'_gentleman_': 1,
'inevitable': 4,
'begging': 1,
'separation': 4,
'narrower': 1,
'dryly': 2,
'disinclination': 2,
'lives': 7,
'_repentance_': 1,
'studied': 2,
'objected': 2,
'remonstrance': 2,
'denial': 4,
'Woman': 2,
'patiently': 1,
'fresco': 1,
'names': 3,
'accommodations': 2,
'proportion': 4,
'recollected': 3,
'restoring': 1,
'characters': 2,
'Fairfax': 241,
'amiable': 34,
'saving': 2,
'failures': 1,
'permitted': 4,
'admirable': 3,
'selfish': 5,
'Standing': 1,
'_wish_': 1,
'report': 10,
'quick': 22,
'lists': 2,
'mama': 3,
'chatter': 1,
'pangs': 1,
'roast': 4,
'discipline': 2,
'cooler': 4,
'anticipated': 6,
'Wiltshire': 1,
'henceforth': 1,
'broken': 3,
'civilities': 9,
'damp': 6,
'fire': 16,
'views': 17,
'conversation': 42,
'governess': 9,
".'": 23,
'alleviations': 2,
'XVIII': 3,
'parent': 2,
'_great_': 1,
'moral': 1,
'decision': 8,
'accustomed': 2,
'ignorance': 4,
'pride': 18,
'decisive': 5,
'disposed': 21,
'thanked': 7,
'led': 25,
'Hazle': 1,
'river': 3,
'protesting': 1,
'assurance': 8,
'sort': 112,
'freedom': 3,
'prosing': 1,
'tires': 1,
'Supper': 1,
'either': 61,
'vexation': 6,
'horrible': 5,
'tremble': 2,
'saddle': 1,
'ages': 2,
'calm': 5,
'unwillingness': 4,
'objects': 7,
'Dinner': 2,
'butler': 2,
'traffic': 1,
'Those': 8,
'attitude': 3,
'universally': 4,
'sleety': 1,
'fancying': 12,
'Other': 1,
'influenced': 2,
'remains': 5,
'Hetty': 2,
'Mitchell': 1,
'warmth': 11,
'too': 253,
'Sixty': 1,
...})
#得到一個字典,希望分解成兩個對應的列表
lebal=[]
quant=[]
for word in fdist1:
lebal.append(word)
quant.append(fdist1[word])
quant
sorted(quant,reverse = True)
[11454,
6928,
5183,
4844,
4672,
4279,
3178,
3004,
2385,
2381,
2199,
2128,
2118,
2101,
2004,
1970,
1778,
1730,
1677,
1606,
1387,
1382,
1365,
1321,
1301,
1220,
1187,
1153,
1151,
1148,
1138,
1088,
1007,
997,
933,
924,
865,
835,
825,
815,
759,
758,
699,
685,
677,
651,
619,
616,
592,
591,
580,
574,
564,
564,
562,
559,
558,
552,
549,
535,
506,
490,
484,
478,
464,
452,
447,
441,
440,
439,
434,
432,
422,
421,
420,
418,
415,
413,
400,
398,
398,
392,
389,
385,
380,
375,
366,
357,
356,
354,
347,
340,
338,
337,
337,
335,
327,
322,
315,
313,
312,
308,
303,
301,
301,
297,
293,
281,
279,
273,
272,
270,
269,
263,
263,
260,
253,
248,
246,
243,
243,
243,
241,
237,
235,
233,
230,
226,
224,
223,
221,
220,
220,
219,
217,
213,
212,
212,
211,
209,
208,
207,
204,
200,
199,
199,
193,
190,
190,
190,
189,
185,
181,
177,
174,
171,
169,
166,
163,
160,
159,
159,
157,
155,
153,
152,
150,
148,
148,
146,
145,
145,
145,
144,
144,
143,
142,
141,
140,
139,
138,
138,
134,
133,
133,
133,
133,
130,
129,
129,
129,
129,
128,
126,
125,
125,
125,
124,
122,
122,
120,
120,
119,
119,
118,
118,
117,
117,
116,
116,
116,
114,
114,
113,
113,
112,
110,
110,
109,
109,
108,
108,
108,
108,
106,
106,
106,
106,
105,
102,
102,
102,
102,
101,
100,
100,
99,
99,
98,
97,
97,
96,
96,
95,
95,
95,
94,
94,
93,
92,
92,
92,
92,
91,
90,
90,
90,
90,
89,
89,
89,
89,
89,
89,
88,
88,
88,
88,
87,
87,
86,
86,
85,
85,
85,
85,
85,
84,
83,
83,
82,
82,
82,
81,
81,
81,
81,
80,
80,
80,
80,
79,
79,
78,
77,
77,
76,
76,
76,
76,
75,
75,
74,
73,
73,
72,
72,
72,
72,
71,
71,
71,
71,
71,
71,
70,
70,
70,
69,
69,
69,
69,
68,
68,
68,
68,
68,
67,
67,
67,
67,
66,
66,
66,
66,
66,
65,
65,
65,
65,
65,
64,
64,
64,
64,
64,
64,
63,
63,
63,
63,
63,
63,
63,
63,
62,
62,
61,
61,
61,
61,
61,
61,
61,
61,
60,
60,
60,
60,
60,
60,
60,
59,
59,
59,
59,
59,
59,
59,
59,
59,
58,
58,
57,
57,
57,
56,
56,
56,
56,
56,
56,
56,
56,
56,
55,
55,
55,
55,
54,
54,
54,
54,
54,
54,
54,
53,
53,
53,
53,
52,
52,
52,
52,
51,
51,
51,
51,
51,
51,
51,
51,
51,
51,
51,
51,
51,
51,
51,
50,
50,
50,
50,
50,
50,
50,
49,
49,
49,
49,
49,
49,
49,
48,
48,
48,
48,
48,
48,
47,
47,
47,
47,
47,
47,
47,
47,
46,
46,
46,
46,
46,
46,
46,
46,
46,
46,
46,
45,
45,
45,
45,
45,
45,
44,
44,
44,
44,
44,
44,
44,
43,
43,
43,
43,
43,
43,
42,
42,
42,
42,
42,
42,
41,
41,
41,
41,
41,
41,
41,
41,
41,
40,
40,
40,
40,
40,
40,
40,
40,
40,
40,
40,
40,
40,
39,
39,
39,
39,
39,
39,
39,
39,
39,
39,
39,
39,
39,
39,
39,
39,
39,
38,
38,
38,
38,
38,
38,
38,
38,
38,
38,
38,
38,
38,
38,
38,
37,
37,
37,
37,
37,
37,
37,
37,
37,
37,
37,
36,
36,
36,
36,
36,
36,
36,
36,
36,
36,
36,
36,
36,
35,
35,
35,
35,
35,
35,
35,
34,
34,
34,
34,
34,
34,
34,
34,
34,
34,
33,
33,
33,
33,
33,
33,
33,
33,
33,
32,
32,
32,
32,
32,
32,
32,
32,
32,
32,
32,
32,
32,
32,
31,
31,
31,
31,
31,
31,
31,
31,
31,
31,
31,
31,
31,
31,
31,
31,
31,
31,
31,
31,
31,
30,
30,
30,
30,
30,
30,
30,
30,
30,
30,
30,
30,
30,
30,
30,
30,
30,
30,
29,
29,
29,
29,
29,
29,
29,
29,
29,
29,
29,
29,
29,
29,
29,
29,
29,
28,
28,
28,
28,
28,
28,
28,
28,
28,
28,
28,
28,
28,
28,
27,
27,
27,
27,
27,
27,
27,
27,
27,
27,
27,
27,
27,
27,
27,
27,
27,
27,
27,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
26,
25,
25,
25,
25,
25,
25,
25,
25,
25,
25,
25,
25,
25,
25,
25,
25,
25,
25,
25,
25,
25,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
24,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
23,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
22,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
21,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
19,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
18,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
17,
...]
畫圖
import matplotlib as mpl
import matplotlib.pyplot as plt
import pylab as pl
%pylab inline
Populating the interactive namespace from numpy and matplotlib
#這邊的data是需要ndarray的格式
quant=np.array(quant)
pl.figure(figsize=(8,8))
pl.hist(quant,bins=100)
pl.show
<function matplotlib.pyplot.show>
pl.hist?
出現這樣的圖是因爲詞頻的極差非常大,而畫累積概率分佈可以說明問題,還有matplotlib值得認真學習
pl.figure(figsize=(15,8))#設定畫圖大小
fdist1.plot(50,cumulative=True)
前50就幾乎包括了一大半的詞頻!!!