詞頻直方圖

詞頻直方圖

算詞頻

import nltk
#加載Gutenberg語料庫
from nltk.corpus import gutenberg
gutenberg.fileids()
['austen-emma.txt',
 'austen-persuasion.txt',
 'austen-sense.txt',
 'bible-kjv.txt',
 'blake-poems.txt',
 'bryant-stories.txt',
 'burgess-busterbrown.txt',
 'carroll-alice.txt',
 'chesterton-ball.txt',
 'chesterton-brown.txt',
 'chesterton-thursday.txt',
 'edgeworth-parents.txt',
 'melville-moby_dick.txt',
 'milton-paradise.txt',
 'shakespeare-caesar.txt',
 'shakespeare-hamlet.txt',
 'shakespeare-macbeth.txt',
 'whitman-leaves.txt']
#挑選簡·奧斯丁的《愛瑪》
emma=nltk.corpus.gutenberg.words('austen-emma.txt')
type(emma)
nltk.corpus.reader.util.StreamBackedCorpusView
len(emma)
192427
#每個單詞平均被使用的次數
len(emma)/len(set(emma))
24.63538599411087
len(set(emma))
7811
sorted(set(emma))#生成詞彙列表,在排序中,大寫在小寫之前,依ANSIC碼排序
['!',
 '!"',
 '!"--',
 "!'",
 "!'--",
 '!)--',
 '!--',
 '!--"',
 '!--(',
 '!--`',
 '"',
 '"\'',
 '"--',
 '"`',
 '&',
 "'",
 "'--",
 "';",
 '(',
 ')',
 '),',
 ')--',
 ').',
 ').--',
 ');--',
 ',',
 ',"',
 ',"--',
 ",'",
 ',\'"',
 ',)',
 ',--',
 ',--"',
 '-',
 '--',
 '--"',
 '--(',
 '--,',
 '----',
 '----------,',
 "--------.'",
 '--.',
 '--."',
 "--.'",
 '--:',
 '--`',
 '.',
 '."',
 '."--',
 ".'",
 '.\'"',
 ".'--",
 ".'--`",
 '.)',
 '.,',
 '.,"',
 ".,'",
 '.--',
 '.--"',
 '.--`',
 '.]',
 '000',
 '10',
 '1816',
 '23rd',
 '24th',
 '26th',
 '28th',
 '7th',
 '8th',
 ':',
 ':"',
 ':"--',
 ":'",
 ":'--",
 ':--',
 ':--"',
 ';',
 ';"',
 ';"--',
 ";'",
 ";'--",
 ';--',
 ';--"',
 '?',
 '?"',
 '?"--',
 '?"--"',
 "?'",
 '?\'"',
 '?)--',
 '?--',
 '?--"',
 '?--(',
 'A',
 'Abbey',
 'Abbots',
 'Abdy',
 'Abominable',
 'About',
 'Absence',
 'Absolute',
 'Absolutely',
 'Absurd',
 'According',
 'Accordingly',
 'Acquit',
 'Actually',
 'Adelaide',
 'Adopt',
 'After',
 'Agreed',
 'Agricultural',
 'Ah',
 'Aladdin',
 'Alas',
 'Alderneys',
 'All',
 'Almane',
 'Almost',
 'Although',
 'Altogether',
 'Always',
 'Am',
 'Ambition',
 'Amiable',
 'An',
 'And',
 'Angry',
 'Anna',
 'Anne',
 'Another',
 'Anxious',
 'Any',
 'Anywhere',
 'Apologies',
 'Approve',
 'April',
 'Are',
 'Arthur',
 'As',
 'Assured',
 'Astley',
 'Astonished',
 'At',
 'August',
 'Augusta',
 'Aunt',
 'Austen',
 'Aye',
 'Bad',
 'Balls',
 'Baly',
 'Barnes',
 'Baronne',
 'Bates',
 'Bateses',
 'Bath',
 'Be',
 'Bear',
 'Beautiful',
 'Beavers',
 'Before',
 'Beg',
 'Behold',
 'Being',
 'Believe',
 'Bella',
 'Besides',
 'Better',
 'Between',
 'Beyond',
 'Bickerton',
 'Bird',
 'Birmingham',
 'Birth',
 'Bless',
 'Blessed',
 'Boarding',
 'Bond',
 'Books',
 'Both',
 'Bought',
 'Box',
 'Bragge',
 'Bragges',
 'Braithwaites',
 'Break',
 'Bristol',
 'Broadway',
 'Broadwood',
 'Brother',
 'Brown',
 'Brunswick',
 'Business',
 'Busy',
 'But',
 'By',
 'C',
 'CHAPTER',
 'CHARADE',
 'CHURCHILL',
 'Call',
 'Campbell',
 'Campbells',
 'Can',
 'Candles',
 'Cannot',
 'Captain',
 'Caroline',
 'Catherine',
 'Cautious',
 'Ceremonies',
 'Certain',
 'Certainly',
 'Charming',
 'Children',
 'Chili',
 'Christian',
 'Christmas',
 'Church',
 'Churchill',
 'Churchills',
 'Chuse',
 'Circumstances',
 'Clara',
 'Clayton',
 'Clifton',
 'Cobham',
 'Cole',
 'Coles',
 'Colonel',
 'Come',
 'Command',
 'Common',
 'Compare',
 'Compliments',
 'Composure',
 'Compressed',
 'Comtesse',
 'Conceive',
 'Concession',
 'Conjecture',
 'Consider',
 'Considering',
 'Contrary',
 'Cooper',
 'Could',
 'Cowper',
 'Cox',
 'Coxe',
 'Coxes',
 'Cramer',
 'Cromer',
 'Crown',
 'DEAR',
 'Dancing',
 'Dating',
 'Day',
 'Dear',
 'Dearer',
 'Deceived',
 'December',
 'Decidedly',
 'Delighted',
 'Delightful',
 'Depend',
 'Did',
 'Difference',
 'Dining',
 'Dinner',
 'Dirty',
 'Disingenuousness',
 'Disputable',
 'Dixon',
 'Dixons',
 'Do',
 'Does',
 'Don',
 'Donwell',
 'Dorking',
 'Dr',
 'Dreadful',
 'Dublin',
 'During',
 'E',
 'Early',
 'Easter',
 'Either',
 'Elegant',
 'Elizabeth',
 'Elton',
 'Eltons',
 'Em',
 'Emma',
 'Encouragement',
 'End',
 'Engaged',
 'England',
 'English',
 'Enscombe',
 'Escape',
 'Especially',
 'Even',
 'Ever',
 'Every',
 'Exactly',
 'Excellent',
 'Excellently',
 'Except',
 'Excepting',
 'Excuse',
 'Exquisite',
 'Extracts',
 'Extraordinary',
 'Extremely',
 'F',
 'FINIS',
 'Fairfax',
 'Fancying',
 'Farm',
 'Farmer',
 'February',
 'Fetch',
 'Find',
 'Fine',
 'Finesse',
 'Five',
 'For',
 'Forcing',
 'Ford',
 'Forest',
 'Former',
 'Fortunate',
 'Fortunately',
 'Fortune',
 'Four',
 'Fourteen',
 'Frank',
 'French',
 'Friday',
 'From',
 'Full',
 'Garrick',
 'General',
 'Genlis',
 'George',
 'Gilbert',
 'Gilberts',
 'Give',
 'Go',
 'God',
 'Goddard',
 'Going',
 'Goldsmith',
 'Gone',
 'Good',
 'Graham',
 'Grandmama',
 'Grandpapa',
 'Granted',
 'Gratifying',
 'Great',
 'Green',
 'Grove',
 'Ha',
 'Had',
 'Half',
 'Hall',
 'Handsome',
 'Hannah',
 'Happier',
 'Happily',
 'Happy',
 'Harriet',
 'Harry',
 'Hart_',
 'Hartfield',
 'Has',
 'Have',
 'Having',
 'Hawkins',
 'Hazle',
 'He',
 'Heaven',
 'Heavens',
 'Henceforward',
 'Henry',
 'Her',
 'Here',
 'Hetty',
 'High',
 'Highbury',
 'Hill',
 'Him',
 'His',
 'Hitherto',
 'Hodges',
 'Holyhead',
 'How',
 'However',
 'Hughes',
 'Hum',
 'Human',
 'Humph',
 'Hush',
 'Hymen',
 'I',
 'II',
 'III',
 'IV',
 'IX',
 'If',
 'Ill',
 'Imagine',
 'Immediately',
 'Impossible',
 'Impropriety',
 'Imprudent',
 'In',
 'Increase',
 'Indeed',
 'Indifferent',
 'Indignation',
 'Inn',
 'Instances',
 'Instead',
 'Insufferable',
 'Interference',
 'Intimacy',
 'Invite',
 'Ireland',
 'Irish',
 'Is',
 'Isabella',
 'It',
 'Italian',
 'Its',
 'JULY',
 'James',
 'Jane',
 'January',
 'Jeffereys',
 'John',
 'Judge',
 'July',
 'June',
 'Just',
 'K',
 'Keep',
 'Kindled',
 'King',
 'Kings',
 'Kingston',
 'Kitty',
 'Knightley',
 'Knightleys',
 'La',
 'Ladies',
 'Lady',
 'Lane',
 'Langham',
 'Larkins',
 'Late',
 'Later',
 'Latterly',
 'Leave',
 'Let',
 'Letters',
 'Liable',
 'Lieut',
 'Like',
 'Little',
 'Lively',
 'Living',
 'London',
 'Long',
 'Look',
 'Lord',
 'Lords',
 'Low',
 'M',
 'MADAM',
 'MY',
 'Ma',
 'Madam',
 'Madame',
 'Madeira',
 'Madness',
 'Making',
 'Man',
 'Manchester',
 'Manners',
 'Many',
 'Maple',
 'March',
 'Mark',
 'Marriage',
 'Married',
 'Martin',
 'Martins',
 'Master',
 'Matrimony',
 'May',
 'Me',
 'Men',
 'Mermaids',
 'Methodical',
 'Michaelmas',
 'Mickleham',
 'Middling',
 'Midsummer',
 'Might',
 'Mill',
 'Milmans',
 'Mine',
 'Miniatures',
 'Miss',
 'Misses',
 'Mistake',
 'Mistresses',
 'Mitchell',
 'Monday',
 'More',
 'Morning',
 'Most',
 'Mr',
 'Mrs',
 'Much',
 'Must',
 'My',
 'Myself',
 'Mystery',
 'N',
 'Name',
 'Nash',
 'Natural',
 'Nature',
 'Nay',
 'Neither',
 'Neptune',
 'Never',
 'News',
 'No',
 'Nobody',
 'None',
 'Nonsense',
 'Nonsensical',
 'Nor',
 'Not',
 'Nothing',
 'November',
 'Now',
 'Observe',
 'October',
 'Of',
 'Offended',
 'Offices',
 'Often',
 'Oftentimes',
 'Oh',
 'On',
 'Once',
 'One',
 'Only',
 'Open',
 'Or',
 'Ostalis',
 'Other',
 'Otway',
 'Otways',
 'Ought',
 'Our',
 'Ours',
 'Oxford',
 'Pain',
 'Papa',
 'Pardon',
 'Park',
 'Part',
 'Partridge',
 'Pass',
 'Patroness',
 'Patty',
 'Peculiarly',
 'Pembroke',
 'People',
 'Perfect',
 'Perfectly',
 'Perhaps',
 'Perry',
 'Perrys',
 'Philippics',
 'Picture',
 'Pilfering',
 'Place',
 'Plain',
 'Playing',
 'Pleasant',
 'Pleasure',
 'Poor',
 'Poverty',
 'Pray',
 'Prejudiced',
 'Presently',
 'Pretty',
 'Prince',
 'Proof',
 'Proportions',
 'Put',
 'Quantities',
 'Quick',
 'Quite',
 'Randall',
 'Randalls',
 'Rather',
 'Read',
 'Real',
 'Receive',
 'Referring',
 'Remember',
 'Reports',
 'Resentment',
 'Respect',
 'Richard',
 'Richardson',
 'Richmond',
 'Robert',
 'Romance',
 'Rousing',
 'S',
 'Satisfied',
 'Saturday',
 'Saunders',
 'Say',
 'School',
 'Scotland',
 'Seats',
 'See',
 'Seldom',
 'Selina',
 'Sept',
 'September',
 'Serious',
 'Serle',
 'Service',
 'Seven',
 'Shakespeare',
 'Shall',
 'She',
 'Shocking',
 'Short',
 'Shortly',
 'Should',
 'Sighs',
 'Since',
 'Sir',
 'Six',
 'Sixteen',
 'Sixty',
 'Skilful',
 'Small',
 'Smallridge',
 'Smiles',
 'Smith',
 'Smiths',
 'So',
 'Soft',
 'Some',
 'Somebody',
 'Something',
 'Sometimes',
 'Son',
 'Soon',
 'Sorrow',
 'Soup',
 'South',
 'Square',
 'St',
 'Standing',
 'Stay',
 'Still',
 'Stilton',
 'Stokes',
 'Stop',
 'Success',
 'Such',
 'Suckling',
 'Sucklings',
 'Sunday',
 'Supper',
 'Suppose',
 'Supposing',
 'Surprizes',
 'Surry',
 'Swisserland',
 'Take',
 'Talking',
 'Tan',
 'Taylor',
 'Tea',
 'Tell',
 'Ten',
 'Thank',
 'That',
 'The',
 'Their',
 'Then',
 'Theodore',
 'There',
 'These',
 'They',
 'Things',
 'Think',
 'This',
 'Those',
 'Though',
 'Three',
 'Thy',
 'Till',
 'Time',
 'Tiresome',
 'Tis',
 'To',
 'Tom',
 'Too',
 'Towards',
 'Trouble',
 'True',
 'Trust',
 'Tuesday',
 'Tunbridge',
 'Tupman',
 'Tupmans',
 'Two',
 'Uncle',
 'Under',
 'Understanding',
 'Undoubtedly',
 'Ungrateful',
 'Unwelcome',
 'Upon',
 'Used',
 'V',
 'VI',
 'VII',
 'VIII',
 'VOLUME',
 'Vanity',
 'Venice',
 'Very',
 'Vicar',
 'Vicarage',
 'Vigorous',
 'Voices',
 'W',
 'WESTON',
 'WINDSOR',
 'Waiving',
 'Wakefield',
 'Walk',
 'Wallis',
 'Wallises',
 'Want',
 'Warmth',
 'Was',
 'Wax',
 'We',
 'Weather',
 'Wednesday',
 'Welch',
 'Well',
 'Were',
 'West',
 'Weston',
 'Westons',
 'Weymouth',
 'What',
 'Whatever',
 'When',
 'Whenever',
 'Where',
 'Whether',
 'Which',
 'While',
 'Who',
 'Whoever',
 'Whom',
 'Why',
 'Wickedness',
 'Will',
 'William',
 'Wiltshire',
 'Windsor',
 'Wingfield',
 'Wish',
 'With',
 'Within',
 'Without',
 'Witness',
 'Woman',
 'Women',
 'Woodhouse',
 'Woodhouses',
 'Worse',
 'Would',
 'Wrapt',
 'Wright',
 'Wrong',
 'X',
 'XI',
 'XII',
 'XIII',
 'XIV',
 'XIX',
 'XV',
 'XVI',
 'XVII',
 'XVIII',
 'Yes',
 'Yet',
 'York',
 'Yorkshire',
 'You',
 'Young',
 'Your',
 'Yours',
 '[',
 ']',
 '_',
 '_Adair_',
 '_Bath_',
 '_Chaperon_',
 '_Courtship_',
 '_Dixon_',
 '_Dixons_',
 '_Elton_',
 '_He_',
 '_Her_',
 '_His_',
 '_I_',
 '_May_',
 '_Miss_',
 '_Most_',
 '_Mr_',
 '_Mrs',
 '_Mrs_',
 '_My_',
 '_Now_',
 '_Perfection_',
 '_Philip_',
 '_Rev',
 '_Robin_',
 '_She_',
 '_Some_',
 '_Taylor_',
 '_The_',
 '_There_',
 '_We_',
 '_What_',
 '_White',
 '_Woodhouse_',
 '_You_',
 '_______',
 '_a_',
 '_accepted_',
 '_addition_',
 '_all_',
 '_almost_',
 '_alone_',
 '_amor_',
 '_and_',
 '_answer_',
 '_any_',
 '_appropriation_',
 '_as_',
 '_assistance_',
 '_at_',
 '_be_',
 '_been_',
 '_blunder_',
 '_boiled_',
 '_both_',
 '_bride_',
 '_broke_',
 '_caro_',
 '_cause_',
 '_compassion_',
 '_compliments_',
 '_court_',
 '_courtship_',
 '_did_',
 '_dissolved_',
 '_doubts_',
 '_each_',
 '_eighteen_',
 '_engagement_',
 '_evening_',
 '_felt_',
 '_first_',
 '_gentleman_',
 '_great_',
 '_greater_',
 '_had_',
 '_half_',
 '_happily_',
 '_has_',
 '_have_',
 '_he_',
 '_her_',
 '_here_',
 '_him_',
 '_his_',
 '_home_',
 '_housebreaking_',
 '_introduction_',
 '_invite_',
 '_is_',
 '_it_',
 '_joint_',
 '_just_',
 '_lady_',
 '_letting_',
 '_little_',
 '_man_',
 '_married_',
 '_marry_',
 '_me_',
 '_mediocre_',
 '_misery_',
 '_moment_',
 '_more_',
 '_must_',
 '_my_',
 '_named_',
 '_names_',
 '_nearer_',
 '_not_',
 '_now_',
 '_of_',
 '_one_',
 '_our_',
 '_own_',
 '_part_',
 '_particular_',
 '_party_',
 '_patriae_',
 '_precious_',
 '_present_',
 '_presume_',
 '_promise_',
 '_purport_',
 '_recollecting_',
 '_refused_',
 '_repentance_',
 '_respect_',
 '_sacrifice_',
 '_say_',
 '_secret_',
 '_sensation_',
 '_shall_',
 '_she_',
 '_ship_',
 '_should_',
 '_small_',
 '_some_',
 '_source_',
 '_sposo_',
 '_tell_',
 '_ten_',
 '_that_',
 '_the_',
 '_them_',
 '_then_',
 '_they_',
 '_thing_',
 '_think_',
 '_thoughts_',
 '_three_',
 '_time_',
 '_times_',
 '_to_',
 '_told_',
 '_treasures_',
 '_try_',
 '_two_',
 '_understand_',
 '_unreasonable_',
 '_unrequited_',
 '_us_',
 '_very_',
 '_wanted_',
 '_was_',
 '_way_',
 '_we_',
 '_well_',
 '_were_',
 '_when_',
 '_who_',
 '_will_',
 ...]
#其中FreqDist是nltk.probability類下的!!
from nltk.probability import FreqDist
fdist1=FreqDist(emma)
fdist1   #得到一個字典,希望分解成兩個對應的列表
FreqDist({'lighter': 1,
          'victims': 1,
          'Square': 11,
          'playfully': 2,
          'mortified': 8,
          'fill': 2,
          'wait': 22,
          'watched': 8,
          'vessel': 1,
          'quite': 269,
          'humanity': 3,
          'running': 4,
          'ajar': 2,
          'witnessed': 4,
          'exercise': 17,
          'oppress': 1,
          'treble': 1,
          'foundation': 3,
          'conversed': 1,
          'sooner': 12,
          'candidly': 1,
          'sit': 38,
          'fresh': 14,
          'flock': 1,
          'accomplish': 2,
          'jealous': 3,
          'invalid': 4,
          'composedly': 2,
          'thing': 398,
          'improving': 1,
          'settle': 12,
          'thought': 226,
          'popularity': 4,
          'untowardly': 1,
          'luxury': 4,
          'tears': 9,
          'disgust': 5,
          'undiscerned': 1,
          'parlour': 12,
          'minutes': 53,
          'purchasing': 1,
          'conveyance': 1,
          'lost': 21,
          'strangest': 1,
          'sending': 7,
          'chained': 1,
          'palatable': 1,
          'invariable': 1,
          'regardless': 2,
          'wretched': 12,
          'painful': 5,
          'needless': 3,
          'unavoidable': 1,
          'purity': 1,
          'raised': 6,
          'ingeniously': 1,
          'dependence': 11,
          'regretted': 5,
          '&': 3,
          'remedy': 1,
          'Forcing': 1,
          'artificial': 1,
          'Let': 26,
          'orderly': 1,
          'wearying': 1,
          'Read': 2,
          'instrumental': 1,
          'denied': 4,
          'destined': 4,
          'misunderstandings': 4,
          'channel': 1,
          'sheet': 1,
          'telling': 22,
          'Brown': 1,
          'broader': 2,
          'keen': 1,
          'on': 677,
          'prominent': 3,
          'Mill': 11,
          'Augusta': 4,
          'retire': 1,
          'smallest': 20,
          'requiring': 2,
          'remote': 1,
          'Dublin': 1,
          'blow': 3,
          'somewhat': 4,
          'felling': 1,
          'amazed': 4,
          'amazingly': 2,
          'tones': 1,
          'uncivil': 1,
          'Pleasure': 1,
          'bonnet': 3,
          'bid': 4,
          'jokes': 1,
          'method': 3,
          'instruments': 1,
          'unpersuadable': 3,
          'wondered': 4,
          'spread': 7,
          'seduced': 1,
          'despatched': 1,
          'concise': 2,
          'soon': 221,
          'endeavouring': 3,
          'lamented': 2,
          'My': 108,
          'that': 1730,
          'March': 1,
          'promoting': 1,
          'stronger': 8,
          'testify': 1,
          'manner': 75,
          'hero': 2,
          'persuasions': 1,
          'listener': 1,
          'raising': 3,
          'dearly': 2,
          'reproached': 1,
          'two': 171,
          'performance': 8,
          'provide': 4,
          'was': 2385,
          'plenty': 5,
          '_wanted_': 1,
          'valuable': 9,
          'any': 651,
          'goose': 2,
          'visited': 5,
          'anxiety': 19,
          'subduing': 2,
          'scheme': 23,
          'order': 14,
          'illegitimacy': 2,
          '"\'': 1,
          'comparatively': 2,
          'Exactly': 7,
          'Anxious': 1,
          'Such': 49,
          'imagined': 20,
          'News': 1,
          'Alderneys': 1,
          'battle': 1,
          'gardens': 7,
          'dirt': 1,
          'packs': 1,
          'sister': 33,
          'preparing': 5,
          'overcome': 9,
          'humoured': 6,
          'enjoy': 9,
          'trade': 7,
          'blaming': 1,
          'overheard': 2,
          'lashes': 2,
          'condescending': 1,
          'important': 20,
          'hurried': 8,
          'imposing': 1,
          'unpleasant': 13,
          'Catherine': 1,
          'ing': 1,
          'relate': 5,
          '_He_': 2,
          'flourishing': 1,
          'Baly': 1,
          'arrived': 18,
          ':"--': 2,
          'undistinguishing': 1,
          'discrimination': 1,
          'desultory': 1,
          'mysteriously': 1,
          'summoned': 1,
          '_names_': 1,
          'spending': 5,
          'climate': 1,
          'self': 23,
          'respite': 1,
          'grandeur': 3,
          'Its': 4,
          'pressing': 6,
          'reasonable': 20,
          'eats': 2,
          'killed': 4,
          'belonging': 4,
          'Randall': 1,
          'industry': 1,
          'disperse': 1,
          'heap': 1,
          'impair': 1,
          'Humph': 4,
          'readiest': 1,
          'perfectly': 65,
          'bath': 1,
          'softening': 2,
          'turning': 24,
          'sparkling': 2,
          'Graham': 1,
          'lieu': 1,
          'contributing': 1,
          'worshipping': 1,
          'understood': 23,
          'placing': 3,
          'affairs': 5,
          'Yorkshire': 8,
          '_told_': 1,
          'gala': 1,
          'afraid': 65,
          'hear': 100,
          'description': 7,
          'year': 28,
          'uneasiness': 10,
          'scouted': 1,
          'haunting': 1,
          'price': 2,
          'excepted': 2,
          'Actually': 2,
          'darling': 1,
          'al': 1,
          'chattering': 1,
          'Lane': 6,
          'player': 2,
          'consent': 14,
          'elsewhere': 2,
          'slowly': 6,
          'fashioned': 3,
          'determination': 5,
          'residence': 4,
          'blockhead': 3,
          'modest': 3,
          'other': 220,
          'complains': 2,
          'variations': 1,
          'impending': 1,
          'Emma': 865,
          'need': 42,
          'escorted': 2,
          'violent': 2,
          'amendment': 1,
          'inspire': 1,
          'An': 12,
          'rice': 1,
          'Presently': 2,
          'gets': 4,
          'Miniatures': 1,
          'Some': 14,
          'flame': 1,
          'Abbots': 1,
          'cloth': 1,
          'gratitude': 29,
          'chapter': 1,
          'worn': 3,
          'Enscombe': 36,
          'shocked': 7,
          'gains': 1,
          'escaped': 7,
          'felicities': 2,
          'pretty': 66,
          'revealed': 2,
          'crosser': 1,
          'corroborating': 1,
          'reminding': 2,
          'exclaiming': 4,
          'circumspection': 1,
          'guinea': 1,
          'as': 1387,
          'contemplating': 1,
          'cordially': 4,
          'event': 24,
          '_your_': 5,
          'counsellor': 2,
          'oppression': 1,
          'survey': 1,
          'Wakefield': 1,
          'diet': 1,
          'forward': 36,
          'piece': 13,
          'quarter': 25,
          'Gilberts': 1,
          'mortify': 1,
          'complimenter': 1,
          'rousing': 2,
          'wheres': 1,
          'petticoat': 2,
          '_small_': 1,
          'desperate': 3,
          'engaging': 7,
          'sense': 56,
          'decease': 2,
          'notice': 27,
          'adversary': 1,
          'privileged': 2,
          'little': 354,
          'late': 26,
          'Smiths': 1,
          '_home_': 2,
          'slackened': 1,
          'unprepared': 2,
          'breath': 3,
          'fits': 1,
          'business': 54,
          'mimic': 1,
          'illnesses': 3,
          'formal': 4,
          'Stop': 2,
          'distinguished': 3,
          'poignant': 2,
          'expressed': 10,
          'procuring': 5,
          'precedes': 1,
          'butcher': 2,
          'Under': 2,
          'sick': 10,
          'ware': 1,
          'l': 2,
          'alphabets': 2,
          ':"': 2,
          'charges': 1,
          'fondly': 4,
          'pencilled': 1,
          'occurs': 1,
          'periods': 1,
          'whoever': 2,
          'thorough': 12,
          'labour': 4,
          'borrow': 1,
          'lengths': 2,
          'resources': 10,
          'known': 60,
          'tea': 24,
          'syllable': 10,
          'waverings': 1,
          'explained': 3,
          'needed': 2,
          'quarto': 1,
          'attractive': 2,
          'obstinate': 1,
          '_and_': 1,
          'gallant': 11,
          'February': 7,
          'sees': 3,
          'mock': 1,
          'calmly': 3,
          'suffering': 16,
          'begs': 1,
          'auspices': 1,
          'solemnly': 1,
          'engrosses': 1,
          'mode': 1,
          'vanity': 13,
          'pales': 1,
          'unhealthy': 2,
          'emotion': 3,
          'surmises': 1,
          'militia': 2,
          'undesirable': 1,
          'first': 209,
          'talks': 4,
          'possible': 84,
          'opportunities': 2,
          '_both_': 1,
          '!': 549,
          'associates': 2,
          'missing': 3,
          'unseen': 3,
          'came': 119,
          'close': 18,
          'since': 63,
          'Coles': 17,
          'leniently': 1,
          'warmly': 17,
          'improvidently': 1,
          'hoped': 43,
          'misinterpreted': 2,
          'drawing': 30,
          'intends': 1,
          'canvassing': 1,
          'Understanding': 2,
          'unfeelingly': 1,
          'sent': 33,
          'pools': 1,
          'Bristol': 8,
          'line': 16,
          'stop': 13,
          'disappoint': 3,
          'sunk': 10,
          'alteration': 2,
          '8th': 1,
          'cheeks': 4,
          'defined': 1,
          'ascending': 1,
          'living': 17,
          'clinging': 1,
          'tranquil': 4,
          'Robert': 32,
          'whichever': 1,
          'depended': 8,
          'predict': 2,
          'youngest': 4,
          'comprised': 1,
          'visit': 86,
          'whole': 76,
          'fairy': 3,
          'certainty': 10,
          'communication': 9,
          'surely': 3,
          'privileges': 1,
          'shoulders': 3,
          'End': 8,
          'whispers': 1,
          'softer': 3,
          'prime': 1,
          'occupation': 4,
          'Forest': 2,
          'Hartfield': 160,
          'gifted': 1,
          'Somebody': 5,
          '_would_': 1,
          'bleak': 1,
          'unmirthful': 1,
          'agreeably': 5,
          'portion': 3,
          'camp': 1,
          'timidity': 1,
          'opposition': 2,
          'insensibility': 2,
          'unemployed': 1,
          'successively': 1,
          'creditably': 1,
          'filled': 2,
          'malt': 1,
          'bare': 2,
          'fireside': 4,
          '?\'"': 2,
          'evinced': 1,
          'raptures': 5,
          'delegating': 1,
          'service': 22,
          'panic': 2,
          'discoveries': 4,
          'food': 3,
          'analogy': 1,
          'comprehend': 15,
          '_bride_': 1,
          'who': 281,
          'glimpse': 4,
          'faith': 5,
          'hastening': 1,
          'disposition': 24,
          'concealment': 11,
          'VOLUME': 3,
          'expressions': 8,
          'shoes': 4,
          'sashed': 2,
          'rush': 2,
          'enduring': 1,
          'idea': 100,
          'portfolio': 1,
          'limb': 1,
          'illiterate': 2,
          'Soon': 3,
          'appreciating': 1,
          'Wrapt': 1,
          'drown': 1,
          'estate': 4,
          'reassembled': 1,
          'apartment': 2,
          'possess': 2,
          '_each_': 1,
          'side': 71,
          'else': 80,
          'dissipate': 1,
          'woollen': 1,
          'Weather': 1,
          'readily': 7,
          'folding': 1,
          'thoughts': 38,
          'language': 9,
          'deplore': 2,
          'coldest': 1,
          'Granted': 1,
          'separations': 1,
          'infection': 3,
          'Easter': 2,
          'Son': 1,
          'changed': 6,
          'uncouthness': 1,
          'appendages': 1,
          'teachers': 3,
          'disapprobation': 1,
          '_we_': 6,
          'Anne': 3,
          'rightly': 4,
          'Alas': 3,
          'engrossing': 1,
          'restrained': 2,
          'concert': 1,
          'main': 2,
          'pointing': 1,
          'matrimony': 7,
          'benevolent': 2,
          'wrist': 1,
          'eldest': 8,
          'governed': 2,
          'unfelt': 1,
          'desirable': 18,
          'extricated': 1,
          'praising': 1,
          'experienced': 1,
          'stain': 2,
          'Absence': 1,
          'streets': 1,
          'anticipation': 4,
          'directly': 51,
          'haberdasher': 1,
          'pays': 4,
          'accord': 1,
          'more': 464,
          'discordancies': 1,
          'transfer': 2,
          'spoiled': 4,
          'presumption': 7,
          'Master': 1,
          'Uncle': 2,
          'trophies': 1,
          'fetch': 6,
          'aye': 2,
          'want': 89,
          'undesigned': 1,
          'basis': 2,
          'ringing': 1,
          'dignities': 1,
          'confers': 1,
          'retort': 1,
          'hungry': 3,
          'entitled': 3,
          'however': 114,
          'insane': 1,
          'forms': 1,
          'escape': 14,
          'politeness': 7,
          'second': 31,
          'throat': 12,
          'grandpapas': 1,
          'lonely': 1,
          'spurn': 1,
          'coachman': 5,
          'unexceptionably': 1,
          'played': 8,
          'discover': 4,
          'honestly': 3,
          'discretion': 4,
          'quarrelsome': 1,
          'cautiousness': 1,
          'courageous': 1,
          'Immediately': 1,
          'dissuade': 3,
          'succeeded': 11,
          'combine': 1,
          'Certainly': 11,
          '_unrequited_': 1,
          'avail': 2,
          'bailiff': 1,
          'finger': 3,
          'constitution': 7,
          'madness': 4,
          'capable': 6,
          'chusing': 2,
          'comforts': 8,
          'downstairs': 4,
          'sullenness': 1,
          'discussed': 1,
          'black': 5,
          'temporary': 2,
          'gentlewoman': 2,
          'represent': 1,
          'brunt': 1,
          'strikingly': 1,
          'watering': 3,
          'shut': 9,
          'argumentative': 1,
          'stretch': 2,
          'instigator': 1,
          'feet': 1,
          'dumplings': 1,
          'composure': 7,
          'league': 1,
          'knowledge': 26,
          'effectually': 3,
          'rapturous': 1,
          'room': 117,
          'prodigy': 2,
          'achievement': 1,
          'support': 8,
          'plainly': 6,
          'buyings': 1,
          'fence': 1,
          'varying': 1,
          'testifying': 1,
          ').': 4,
          'unsuspected': 2,
          'parsnip': 1,
          'breeding': 2,
          'cheerfulness': 6,
          'down': 70,
          'amounted': 1,
          'marked': 3,
          'smooth': 7,
          'sharer': 1,
          'than': 415,
          'glorious': 1,
          'unnecessary': 8,
          'deal': 92,
          'transition': 3,
          'eyeing': 2,
          'rest': 50,
          'fathomed': 1,
          'baby': 6,
          'summer': 23,
          'attends': 1,
          'chuses': 4,
          'critical': 2,
          'doom': 1,
          'precisely': 7,
          'calculated': 3,
          'shewed': 17,
          'dwelt': 4,
          'expected': 40,
          '_My_': 2,
          'Offices': 1,
          'Surry': 9,
          'whispered': 3,
          'arms': 2,
          'supposes': 1,
          'opens': 1,
          'journey': 12,
          'supposed': 35,
          'cheap': 2,
          'drain': 1,
          'ball': 31,
          'grant': 3,
          'happening': 4,
          'inroads': 1,
          'popular': 1,
          'unconcerned': 2,
          'pursued': 1,
          'Beg': 1,
          'behind': 19,
          'push': 1,
          'wonderful': 13,
          'enters': 1,
          'formation': 1,
          'astonish': 1,
          'deference': 2,
          'detached': 1,
          'deathbed': 1,
          'moderate': 7,
          'repast': 1,
          'drawn': 15,
          'involuntary': 1,
          '!--"': 1,
          'Absurd': 1,
          'eat': 12,
          'readier': 1,
          'productive': 1,
          'personally': 1,
          'fortitude': 4,
          'sympathise': 1,
          'improve': 7,
          'pleases': 3,
          'rise': 5,
          'pork': 11,
          'pursue': 1,
          'languor': 2,
          'travels': 1,
          'Pray': 15,
          'Children': 1,
          'confessing': 4,
          'sets': 1,
          'daringly': 1,
          'trick': 5,
          'notions': 6,
          'seriously': 16,
          'dropt': 4,
          'containing': 5,
          'lessened': 2,
          'exaggeration': 1,
          'couplet': 1,
          'solace': 2,
          'Tuesday': 7,
          'Till': 7,
          'capricious': 1,
          'gracious': 3,
          '_two_': 1,
          'timed': 1,
          'disagree': 4,
          'Pass': 2,
          'bursts': 1,
          'multiplied': 2,
          'pretended': 1,
          'reluctant': 1,
          'intreat': 1,
          'returned': 39,
          'pre': 1,
          'insufferable': 6,
          'boy': 13,
          'carried': 5,
          'papas': 1,
          'comforted': 3,
          'prosperity': 3,
          'state': 56,
          'intelligence': 13,
          'gaiters': 1,
          'Ours': 1,
          'underrated': 2,
          'mark': 3,
          'veils': 1,
          'renewal': 3,
          'carelessness': 1,
          'former': 13,
          'mixture': 8,
          'pert': 3,
          'inevitably': 2,
          "!'": 6,
          '_mediocre_': 1,
          'asserted': 2,
          'within': 29,
          'how': 263,
          'indistinct': 1,
          'would': 815,
          'having': 145,
          'Ireland': 14,
          'washing': 1,
          'resulting': 1,
          'obligation': 5,
          'sacks': 1,
          'supply': 9,
          'Bates': 148,
          'turnips': 1,
          'considerable': 17,
          'thereabouts': 1,
          'call': 41,
          'rencontre': 3,
          'quickness': 9,
          'interest': 47,
          'hindrance': 1,
          'yield': 4,
          'fold': 1,
          'made': 199,
          'tone': 24,
          'increase': 12,
          'sleek': 1,
          'attorney': 1,
          'unfit': 4,
          'apples': 15,
          'privation': 1,
          'pressingly': 1,
          'for': 1321,
          'evening': 96,
          'from': 535,
          'served': 1,
          'pop': 1,
          'repugnance': 1,
          'intimates': 2,
          'Knightley': 389,
          'peculiar': 7,
          'talking': 52,
          'apprehensively': 1,
          'heated': 3,
          'Assured': 1,
          'draw': 10,
          'bride': 13,
          'satisfactions': 1,
          'recovered': 12,
          'required': 14,
          'spot': 7,
          'Campbells': 26,
          'welcome': 16,
          'pack': 1,
          'beet': 1,
          'Astley': 4,
          'pitifullest': 1,
          'observing': 8,
          'confession': 4,
          'strong': 44,
          'agriculture': 1,
          'give': 157,
          'Christian': 2,
          'splendour': 2,
          'distinct': 10,
          'disgusting': 2,
          'neighbourhood': 15,
          'doer': 1,
          'packet': 1,
          'Compliments': 1,
          'differing': 1,
          'consciously': 1,
          'note': 23,
          'come': 159,
          'nervously': 1,
          'manage': 5,
          'plain': 22,
          'weight': 4,
          'notes': 1,
          'sale': 2,
          'absent': 10,
          'dreams': 1,
          'announce': 7,
          'curled': 1,
          'How': 108,
          'Has': 7,
          'bewildered': 3,
          'acting': 8,
          'emulate': 1,
          'childhood': 2,
          'gave': 54,
          'recurrence': 2,
          '_Rev': 1,
          'neglect': 6,
          'accent': 5,
          'cabbage': 1,
          '_well_': 2,
          'great': 263,
          'heal': 2,
          'inclination': 21,
          'token': 1,
          'departed': 1,
          'hesitating': 4,
          'edition': 1,
          'buy': 1,
          'consult': 5,
          'consideration': 24,
          'compliments': 14,
          'fondness': 2,
          'merest': 1,
          'unkind': 1,
          'stoop': 2,
          'broad': 7,
          'Used': 1,
          'lending': 1,
          'pantry': 1,
          'attained': 1,
          'services': 1,
          'Happy': 3,
          'disagreement': 3,
          'sincere': 5,
          'guessing': 5,
          'confinement': 2,
          'introductions': 1,
          'mercy': 3,
          'plentiful': 1,
          'conceived': 3,
          'ostentation': 1,
          'tete': 8,
          'foresaw': 3,
          'listen': 18,
          'contrasted': 1,
          'anywhere': 15,
          'hurrying': 9,
          'throughout': 4,
          'remain': 14,
          'kept': 22,
          'vouchsafed': 1,
          'demerits': 1,
          'ceremony': 7,
          'nor': 63,
          'kinder': 5,
          'deduction': 2,
          'Two': 7,
          'dependent': 3,
          'free': 5,
          'yellow': 3,
          'age': 22,
          'atonement': 2,
          'regrets': 6,
          'salted': 2,
          'recollection': 8,
          'along': 4,
          'entreated': 4,
          'partial': 7,
          'travelling': 5,
          'passes': 3,
          'Donwell': 49,
          'farmer': 8,
          'delightfully': 8,
          'spectacles': 9,
          'amidst': 1,
          'sketches': 2,
          '_very_': 10,
          'despoiling': 1,
          'mutual': 5,
          'man': 233,
          'hesitatingly': 3,
          ";'--": 1,
          'kindness': 40,
          'compassionate': 4,
          'office': 16,
          'unbecoming': 1,
          'celibacy': 1,
          'work': 22,
          'themselves': 40,
          '_gentleman_': 1,
          'inevitable': 4,
          'begging': 1,
          'separation': 4,
          'narrower': 1,
          'dryly': 2,
          'disinclination': 2,
          'lives': 7,
          '_repentance_': 1,
          'studied': 2,
          'objected': 2,
          'remonstrance': 2,
          'denial': 4,
          'Woman': 2,
          'patiently': 1,
          'fresco': 1,
          'names': 3,
          'accommodations': 2,
          'proportion': 4,
          'recollected': 3,
          'restoring': 1,
          'characters': 2,
          'Fairfax': 241,
          'amiable': 34,
          'saving': 2,
          'failures': 1,
          'permitted': 4,
          'admirable': 3,
          'selfish': 5,
          'Standing': 1,
          '_wish_': 1,
          'report': 10,
          'quick': 22,
          'lists': 2,
          'mama': 3,
          'chatter': 1,
          'pangs': 1,
          'roast': 4,
          'discipline': 2,
          'cooler': 4,
          'anticipated': 6,
          'Wiltshire': 1,
          'henceforth': 1,
          'broken': 3,
          'civilities': 9,
          'damp': 6,
          'fire': 16,
          'views': 17,
          'conversation': 42,
          'governess': 9,
          ".'": 23,
          'alleviations': 2,
          'XVIII': 3,
          'parent': 2,
          '_great_': 1,
          'moral': 1,
          'decision': 8,
          'accustomed': 2,
          'ignorance': 4,
          'pride': 18,
          'decisive': 5,
          'disposed': 21,
          'thanked': 7,
          'led': 25,
          'Hazle': 1,
          'river': 3,
          'protesting': 1,
          'assurance': 8,
          'sort': 112,
          'freedom': 3,
          'prosing': 1,
          'tires': 1,
          'Supper': 1,
          'either': 61,
          'vexation': 6,
          'horrible': 5,
          'tremble': 2,
          'saddle': 1,
          'ages': 2,
          'calm': 5,
          'unwillingness': 4,
          'objects': 7,
          'Dinner': 2,
          'butler': 2,
          'traffic': 1,
          'Those': 8,
          'attitude': 3,
          'universally': 4,
          'sleety': 1,
          'fancying': 12,
          'Other': 1,
          'influenced': 2,
          'remains': 5,
          'Hetty': 2,
          'Mitchell': 1,
          'warmth': 11,
          'too': 253,
          'Sixty': 1,
          ...})
#得到一個字典,希望分解成兩個對應的列表
lebal=[]
quant=[]
for word in fdist1:
    lebal.append(word)
    quant.append(fdist1[word])
quant
sorted(quant,reverse = True)
[11454,
 6928,
 5183,
 4844,
 4672,
 4279,
 3178,
 3004,
 2385,
 2381,
 2199,
 2128,
 2118,
 2101,
 2004,
 1970,
 1778,
 1730,
 1677,
 1606,
 1387,
 1382,
 1365,
 1321,
 1301,
 1220,
 1187,
 1153,
 1151,
 1148,
 1138,
 1088,
 1007,
 997,
 933,
 924,
 865,
 835,
 825,
 815,
 759,
 758,
 699,
 685,
 677,
 651,
 619,
 616,
 592,
 591,
 580,
 574,
 564,
 564,
 562,
 559,
 558,
 552,
 549,
 535,
 506,
 490,
 484,
 478,
 464,
 452,
 447,
 441,
 440,
 439,
 434,
 432,
 422,
 421,
 420,
 418,
 415,
 413,
 400,
 398,
 398,
 392,
 389,
 385,
 380,
 375,
 366,
 357,
 356,
 354,
 347,
 340,
 338,
 337,
 337,
 335,
 327,
 322,
 315,
 313,
 312,
 308,
 303,
 301,
 301,
 297,
 293,
 281,
 279,
 273,
 272,
 270,
 269,
 263,
 263,
 260,
 253,
 248,
 246,
 243,
 243,
 243,
 241,
 237,
 235,
 233,
 230,
 226,
 224,
 223,
 221,
 220,
 220,
 219,
 217,
 213,
 212,
 212,
 211,
 209,
 208,
 207,
 204,
 200,
 199,
 199,
 193,
 190,
 190,
 190,
 189,
 185,
 181,
 177,
 174,
 171,
 169,
 166,
 163,
 160,
 159,
 159,
 157,
 155,
 153,
 152,
 150,
 148,
 148,
 146,
 145,
 145,
 145,
 144,
 144,
 143,
 142,
 141,
 140,
 139,
 138,
 138,
 134,
 133,
 133,
 133,
 133,
 130,
 129,
 129,
 129,
 129,
 128,
 126,
 125,
 125,
 125,
 124,
 122,
 122,
 120,
 120,
 119,
 119,
 118,
 118,
 117,
 117,
 116,
 116,
 116,
 114,
 114,
 113,
 113,
 112,
 110,
 110,
 109,
 109,
 108,
 108,
 108,
 108,
 106,
 106,
 106,
 106,
 105,
 102,
 102,
 102,
 102,
 101,
 100,
 100,
 99,
 99,
 98,
 97,
 97,
 96,
 96,
 95,
 95,
 95,
 94,
 94,
 93,
 92,
 92,
 92,
 92,
 91,
 90,
 90,
 90,
 90,
 89,
 89,
 89,
 89,
 89,
 89,
 88,
 88,
 88,
 88,
 87,
 87,
 86,
 86,
 85,
 85,
 85,
 85,
 85,
 84,
 83,
 83,
 82,
 82,
 82,
 81,
 81,
 81,
 81,
 80,
 80,
 80,
 80,
 79,
 79,
 78,
 77,
 77,
 76,
 76,
 76,
 76,
 75,
 75,
 74,
 73,
 73,
 72,
 72,
 72,
 72,
 71,
 71,
 71,
 71,
 71,
 71,
 70,
 70,
 70,
 69,
 69,
 69,
 69,
 68,
 68,
 68,
 68,
 68,
 67,
 67,
 67,
 67,
 66,
 66,
 66,
 66,
 66,
 65,
 65,
 65,
 65,
 65,
 64,
 64,
 64,
 64,
 64,
 64,
 63,
 63,
 63,
 63,
 63,
 63,
 63,
 63,
 62,
 62,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 61,
 60,
 60,
 60,
 60,
 60,
 60,
 60,
 59,
 59,
 59,
 59,
 59,
 59,
 59,
 59,
 59,
 58,
 58,
 57,
 57,
 57,
 56,
 56,
 56,
 56,
 56,
 56,
 56,
 56,
 56,
 55,
 55,
 55,
 55,
 54,
 54,
 54,
 54,
 54,
 54,
 54,
 53,
 53,
 53,
 53,
 52,
 52,
 52,
 52,
 51,
 51,
 51,
 51,
 51,
 51,
 51,
 51,
 51,
 51,
 51,
 51,
 51,
 51,
 51,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 49,
 49,
 49,
 49,
 49,
 49,
 49,
 48,
 48,
 48,
 48,
 48,
 48,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 46,
 46,
 46,
 46,
 46,
 46,
 46,
 46,
 46,
 46,
 46,
 45,
 45,
 45,
 45,
 45,
 45,
 44,
 44,
 44,
 44,
 44,
 44,
 44,
 43,
 43,
 43,
 43,
 43,
 43,
 42,
 42,
 42,
 42,
 42,
 42,
 41,
 41,
 41,
 41,
 41,
 41,
 41,
 41,
 41,
 40,
 40,
 40,
 40,
 40,
 40,
 40,
 40,
 40,
 40,
 40,
 40,
 40,
 39,
 39,
 39,
 39,
 39,
 39,
 39,
 39,
 39,
 39,
 39,
 39,
 39,
 39,
 39,
 39,
 39,
 38,
 38,
 38,
 38,
 38,
 38,
 38,
 38,
 38,
 38,
 38,
 38,
 38,
 38,
 38,
 37,
 37,
 37,
 37,
 37,
 37,
 37,
 37,
 37,
 37,
 37,
 36,
 36,
 36,
 36,
 36,
 36,
 36,
 36,
 36,
 36,
 36,
 36,
 36,
 35,
 35,
 35,
 35,
 35,
 35,
 35,
 34,
 34,
 34,
 34,
 34,
 34,
 34,
 34,
 34,
 34,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 32,
 32,
 32,
 32,
 32,
 32,
 32,
 32,
 32,
 32,
 32,
 32,
 32,
 32,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 31,
 30,
 30,
 30,
 30,
 30,
 30,
 30,
 30,
 30,
 30,
 30,
 30,
 30,
 30,
 30,
 30,
 30,
 30,
 29,
 29,
 29,
 29,
 29,
 29,
 29,
 29,
 29,
 29,
 29,
 29,
 29,
 29,
 29,
 29,
 29,
 28,
 28,
 28,
 28,
 28,
 28,
 28,
 28,
 28,
 28,
 28,
 28,
 28,
 28,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 26,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 25,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 24,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 23,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 22,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 21,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 18,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 17,
 ...]

畫圖

import matplotlib as mpl
import matplotlib.pyplot as plt
import pylab as pl
%pylab inline
Populating the interactive namespace from numpy and matplotlib
#這邊的data是需要ndarray的格式
quant=np.array(quant)
pl.figure(figsize=(8,8))
pl.hist(quant,bins=100)
pl.show
<function matplotlib.pyplot.show>

png

pl.hist?

出現這樣的圖是因爲詞頻的極差非常大,而畫累積概率分佈可以說明問題,還有matplotlib值得認真學習

pl.figure(figsize=(15,8))#設定畫圖大小
fdist1.plot(50,cumulative=True)

這裏寫圖片描述

前50就幾乎包括了一大半的詞頻!!!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章