【畫圖代碼】matplotlib - 詞向量或類向量散點圖

思路:

  1. 詞向量:
    1. 將詞向量中值最大的維度的下標作爲該詞向量的標籤。
    2. 採用t-SNE對將詞向量壓縮到2維空間,然後畫成散點圖。點的顏色就是該詞向量的標籤。
  2. 類向量
    1. 類向量指的是在分類任務中,樣本在輸入softmax之前的向量。
    2. 直接採用t-SNE對將類向量壓縮到2維空間,然後畫出散點圖。點的顏色就是該類向量的標籤。

例子:

from matplotlib.backends.backend_pdf import PdfPages
from sklearn.manifold import TSNE
from matplotlib import pyplot as plt
import numpy as np

# 假設現在有50個15未的詞向量
word_embedding = \ 
np.array([[ -9.26847000e-02,   2.65970000e-01,   3.29432000e-01,
         -7.39718000e-02,  -6.73928000e-02,  -6.35561000e-02,
         -9.60802000e-02,   3.04700000e-01,  -6.34060000e-02,
         -8.69368000e-02,   3.15142000e-01,   3.25048000e-01,
          3.43239000e-01,   3.20784000e-01,   3.10090000e-01],
       [ -4.08858000e-03,   3.86324000e-01,   4.39724000e-01,
          1.41515000e-02,   2.05663000e-02,   3.34793000e-02,
          2.29018000e-03,   4.27724000e-01,   2.35102000e-02,
          5.34813000e-02,   4.69541000e-01,   4.56828000e-01,
          4.67765000e-01,   4.43996000e-01,   4.19499000e-01],
       [ -5.20866000e-02,   2.24091000e-01,   2.78965000e-01,
         -6.02106000e-02,  -4.93041000e-02,  -4.81996000e-02,
         -5.83473000e-02,   2.58325000e-01,  -4.49707000e-02,
         -5.12397000e-02,   2.65172000e-01,   2.76636000e-01,
          2.64820000e-01,   2.66801000e-01,   2.78434000e-01],
       [  3.01420000e-01,   4.53456000e-01,   5.62255000e-01,
          3.11544000e-01,   3.51967000e-01,   3.50391000e-01,
          3.35528000e-01,   5.39958000e-01,   2.87258000e-01,
          3.27332000e-01,   5.35088000e-01,   5.41420000e-01,
          5.60140000e-01,   5.29896000e-01,   5.53655000e-01],
       [ -3.34516000e-02,   2.78522000e-01,   3.37687000e-01,
         -4.85574000e-02,  -3.37070000e-02,  -2.66458000e-02,
         -2.92930000e-02,   3.31487000e-01,  -1.53911000e-02,
         -5.19083000e-02,   3.33022000e-01,   3.41722000e-01,
          3.27839000e-01,   3.27849000e-01,   3.28789000e-01],
       [  4.43914000e-03,   2.36622000e-01,   2.76034000e-01,
         -1.55010000e-04,   5.38064000e-03,  -5.85116000e-03,
          2.28113000e-03,   2.78247000e-01,   2.74197000e-03,
         -6.24126000e-03,   2.78610000e-01,   2.80766000e-01,
          2.66917000e-01,   2.85312000e-01,   2.73923000e-01],
       [  9.90938000e-02,   1.45888000e-01,   2.29572000e-01,
          9.97999000e-02,   9.14779000e-02,   8.49883000e-02,
          7.79318000e-02,   2.25150000e-01,   1.02579000e-01,
          1.28821000e-01,   2.07060000e-01,   2.13385000e-01,
          1.99332000e-01,   2.20373000e-01,   2.08831000e-01],
       [ -1.86492000e-02,   2.84924000e-01,   3.22759000e-01,
         -2.14725000e-02,  -2.82217000e-02,  -1.93443000e-02,
         -2.08581000e-02,   3.39969000e-01,  -1.27362000e-02,
         -1.59953000e-02,   3.39407000e-01,   3.26889000e-01,
          3.26662000e-01,   3.49180000e-01,   3.28859000e-01],
       [  1.83003000e-01,   4.95721000e-02,   6.46707000e-02,
          1.68665000e-01,   1.95583000e-01,   1.93675000e-01,
          1.67311000e-01,   5.65916000e-02,   1.59715000e-01,
          1.80062000e-01,   6.36378000e-02,   6.98871000e-02,
          5.53104000e-02,   8.59143000e-02,   8.49541000e-02],
       [  3.57093000e-01,  -3.67925000e-02,  -4.25365000e-02,
          3.55424000e-01,   3.47844000e-01,   3.48721000e-01,
          3.45803000e-01,  -2.11651000e-02,   2.84238000e-01,
          3.36385000e-01,  -4.02782000e-02,  -6.11450000e-02,
         -2.28289000e-02,  -2.58880000e-02,  -4.36964000e-02],
       [  1.07747000e-01,   2.40536000e-01,   3.39735000e-01,
          7.96501000e-02,   8.23634000e-02,   4.08718000e-02,
          7.97752000e-02,   3.42705000e-01,   2.58616000e-02,
          4.64636000e-02,   3.28200000e-01,   3.11190000e-01,
          3.50902000e-01,   3.44092000e-01,   2.88163000e-01],
       [  1.96865000e-01,   1.54481000e-01,   1.73556000e-01,
          2.03293000e-01,   2.04454000e-01,   2.27668000e-01,
          2.29026000e-01,   1.77935000e-01,   1.91509000e-01,
          2.39608000e-01,   2.07256000e-01,   1.88885000e-01,
          1.90958000e-01,   1.98382000e-01,   1.93223000e-01],
       [  2.75430000e-02,   4.02585000e-01,   5.12363000e-01,
          1.66888000e-02,   4.71756000e-02,   3.62513000e-02,
          2.24278000e-02,   5.16299000e-01,   2.56757000e-02,
          5.01242000e-02,   4.99019000e-01,   5.19048000e-01,
          5.08627000e-01,   4.98673000e-01,   5.32897000e-01],
       [  1.09952000e-01,   3.68568000e-01,   4.66063000e-01,
          8.67051000e-02,   1.35084000e-01,   1.05813000e-01,
          1.52189000e-01,   4.54485000e-01,   8.05630000e-02,
          1.16354000e-01,   4.81074000e-01,   4.34318000e-01,
          4.68502000e-01,   4.56193000e-01,   4.51878000e-01],
       [  9.07705000e-01,  -1.19126000e-01,  -1.94586000e-01,
          9.69742000e-01,   1.05544000e+00,   1.00079000e+00,
          1.07812000e+00,  -1.24645000e-01,   7.67645000e-01,
          9.83039000e-01,  -1.45237000e-01,  -1.23262000e-01,
         -1.80877000e-01,  -1.68868000e-01,  -1.31907000e-01],
       [  5.47304000e-02,   3.41735000e-01,   3.97924000e-01,
          4.87275000e-02,   6.64988000e-02,   5.46800000e-02,
          5.80973000e-02,   3.95630000e-01,   4.80677000e-02,
          4.67798000e-02,   4.04524000e-01,   4.08717000e-01,
          3.85432000e-01,   3.94440000e-01,   4.01805000e-01],
       [  1.87155000e-01,   3.79954000e-01,   4.83464000e-01,
          2.13328000e-01,   2.56143000e-01,   1.99511000e-01,
          2.31228000e-01,   4.62363000e-01,   1.79116000e-01,
          1.93377000e-01,   4.76625000e-01,   4.80363000e-01,
          4.68738000e-01,   4.96291000e-01,   4.62237000e-01],
       [  1.10135000e+00,   2.21877000e-01,   1.43174000e-01,
          1.19311000e+00,   1.16273000e+00,   1.19129000e+00,
          1.20193000e+00,   2.46340000e-01,   9.00804000e-01,
          1.21326000e+00,   2.55334000e-01,   2.85639000e-01,
          1.75352000e-01,   2.92858000e-01,   2.13424000e-01],
       [  1.24295000e-02,   2.05530000e-01,   2.35478000e-01,
          1.06360000e-02,   1.25000000e-02,   1.39434000e-02,
          6.19534000e-03,   2.32923000e-01,   2.25262000e-02,
          1.63384000e-02,   2.49811000e-01,   2.45686000e-01,
          2.39310000e-01,   2.44092000e-01,   2.49588000e-01],
       [  4.90046000e-02,   1.98349000e-01,   2.44335000e-01,
          2.97860000e-02,   6.89736000e-02,   4.80883000e-02,
          2.78426000e-02,   2.54369000e-01,   2.66232000e-02,
          2.74745000e-02,   1.81589000e-01,   2.40744000e-01,
          2.53391000e-01,   2.07375000e-01,   1.91917000e-01],
       [  4.16505000e-01,  -7.59996000e-02,  -1.09354000e-01,
          4.26273000e-01,   4.31533000e-01,   4.29690000e-01,
          4.25322000e-01,  -1.01641000e-01,   3.21525000e-01,
          4.01477000e-01,  -9.70148000e-02,  -1.15661000e-01,
         -1.03311000e-01,  -9.44064000e-02,  -1.10988000e-01],
       [  1.64333000e-01,   1.08984000e-01,   1.92483000e-01,
          1.67026000e-01,   1.98486000e-01,   1.30462000e-01,
          1.68726000e-01,   1.62928000e-01,   1.57523000e-01,
          1.97097000e-01,   1.78548000e-01,   1.51745000e-01,
          1.84647000e-01,   1.49899000e-01,   1.59358000e-01],
       [ -2.80124000e-01,   4.47964000e-01,   4.65301000e-01,
         -3.12828000e-01,  -3.04826000e-01,  -3.28779000e-01,
         -3.35304000e-01,   4.47377000e-01,  -2.65617000e-01,
         -2.91028000e-01,   4.60513000e-01,   4.65122000e-01,
          4.68598000e-01,   4.98790000e-01,   5.08062000e-01],
       [ -2.86070000e-02,   3.33408000e-01,   3.75448000e-01,
         -2.74869000e-02,  -2.43182000e-02,  -3.28590000e-02,
         -5.06823000e-02,   3.88905000e-01,  -1.90741000e-02,
         -2.09121000e-02,   4.09026000e-01,   3.76590000e-01,
          4.20844000e-01,   4.02346000e-01,   4.01546000e-01],
       [ -5.26256000e-02,   2.24553000e-01,   2.72307000e-01,
         -6.59182000e-02,  -5.10974000e-02,  -5.03741000e-02,
         -5.21351000e-02,   2.65349000e-01,  -3.83339000e-02,
         -6.08046000e-02,   2.80434000e-01,   2.74862000e-01,
          2.72504000e-01,   2.74584000e-01,   2.68554000e-01],
       [  1.74568000e-01,   1.74784000e-01,   2.43011000e-01,
          1.63657000e-01,   1.68196000e-01,   2.27396000e-01,
          1.61371000e-01,   2.33325000e-01,   1.29902000e-01,
          1.61663000e-01,   2.53071000e-01,   2.37139000e-01,
          2.63988000e-01,   2.24518000e-01,   2.45118000e-01],
       [  1.16255000e-02,   2.70476000e-01,   3.56054000e-01,
          4.01313000e-02,   5.06687000e-02,   1.23457000e-02,
          1.54674000e-02,   3.49423000e-01,   1.75948000e-02,
          3.63213000e-02,   3.41615000e-01,   3.45635000e-01,
          3.60263000e-01,   3.62238000e-01,   3.64441000e-01],
       [ -1.73951000e-02,   1.13664000e-01,   1.11284000e-01,
          6.49500000e-02,   7.21068000e-02,   1.08015000e-01,
          8.74592000e-02,   3.03161000e-01,   2.90478000e-02,
          5.41663000e-02,   3.15965000e-01,   2.93228000e-01,
          7.86849000e-02,   2.84163000e-01,   2.84195000e-01],
       [  2.54997000e-01,  -1.63147000e-03,  -1.65853000e-03,
          2.66901000e-01,   2.66949000e-01,   2.63034000e-01,
          2.64883000e-01,  -1.57527000e-02,   2.29119000e-01,
          2.58299000e-01,  -8.23761000e-03,  -7.07062000e-03,
         -2.50627000e-02,  -1.55307000e-02,  -1.21995000e-02],
       [  1.01620000e+00,   2.76538000e-01,   3.51427000e-01,
          1.09238000e+00,   1.09250000e+00,   1.05259000e+00,
          1.05301000e+00,   3.97067000e-01,   8.68429000e-01,
          1.07596000e+00,   3.62113000e-01,   3.74292000e-01,
          3.19586000e-01,   4.03213000e-01,   3.68771000e-01],
       [ -1.63824000e-01,   4.41900000e-01,   5.36594000e-01,
         -1.86510000e-01,  -1.65381000e-01,  -2.02298000e-01,
         -1.81492000e-01,   5.48222000e-01,  -1.36803000e-01,
         -1.74517000e-01,   5.60032000e-01,   5.66377000e-01,
          5.51512000e-01,   5.52923000e-01,   5.57018000e-01],
       [  2.98013000e-01,  -7.65028000e-03,  -2.32811000e-02,
          3.07960000e-01,   3.12602000e-01,   3.06633000e-01,
          3.16460000e-01,  -1.36456000e-02,   2.46844000e-01,
          3.04799000e-01,  -1.25610000e-02,  -9.20712000e-03,
         -8.56796000e-03,   1.98607000e-03,   3.82157000e-03],
       [  3.65859000e-01,  -7.48346000e-02,  -1.15064000e-01,
          4.00183000e-01,   3.82936000e-01,   3.82595000e-01,
          3.76927000e-01,  -8.22186000e-02,   2.96645000e-01,
          3.88128000e-01,  -7.99636000e-02,  -9.87358000e-02,
         -1.12466000e-01,  -9.98273000e-02,  -9.68927000e-02],
       [ -9.37148000e-03,   3.60113000e-01,   4.43448000e-01,
         -7.91685000e-03,  -1.44240000e-02,   3.28721000e-02,
         -5.84508000e-04,   4.32181000e-01,  -6.86684000e-03,
          7.33296000e-03,   4.15355000e-01,   4.22542000e-01,
          4.09812000e-01,   4.63949000e-01,   4.59877000e-01],
       [  5.99695000e-01,   5.82567000e-03,   2.00375000e-02,
          6.23339000e-01,   6.27463000e-01,   6.18225000e-01,
          6.46408000e-01,   2.40243000e-02,   5.03365000e-01,
          6.37536000e-01,   1.70065000e-02,   1.07638000e-02,
          7.66832000e-03,   3.15754000e-02,   8.23876000e-05],
       [ -3.07969000e-04,   2.74125000e-01,   3.15739000e-01,
         -2.54004000e-02,  -4.83835000e-02,  -2.77834000e-02,
         -2.86206000e-02,   3.27560000e-01,  -6.99667000e-03,
         -2.38075000e-02,   3.42171000e-01,   3.46789000e-01,
          3.26242000e-01,   3.04439000e-01,   3.53769000e-01],
       [  7.28510000e-02,   2.55890000e-01,   2.80280000e-01,
          8.03866000e-02,   9.69812000e-02,   8.51930000e-02,
          8.05826000e-02,   3.15382000e-01,   6.11170000e-02,
          6.85091000e-02,   3.21162000e-01,   2.85870000e-01,
          2.89914000e-01,   2.95880000e-01,   2.76949000e-01],
       [  7.77194000e-02,   3.62860000e-01,   4.45740000e-01,
          8.73820000e-02,   8.70558000e-02,   1.08332000e-01,
          1.21860000e-01,   4.53079000e-01,   7.65831000e-02,
          8.84698000e-02,   4.22669000e-01,   4.40995000e-01,
          4.64719000e-01,   4.73536000e-01,   4.35081000e-01],
       [  3.53605000e-01,   9.97475000e-02,   1.29357000e-01,
          3.82858000e-01,   3.54827000e-01,   3.71830000e-01,
          3.73910000e-01,   1.22202000e-01,   2.88347000e-01,
          3.54923000e-01,   1.35949000e-01,   1.20212000e-01,
          1.10296000e-01,   1.00215000e-01,   9.97603000e-02],
       [ -1.50924000e-02,   2.63741000e-01,   3.05744000e-01,
         -6.60809000e-03,   8.13357000e-03,  -1.22735000e-01,
          7.88794000e-03,   3.06299000e-01,  -2.52105000e-04,
          6.39410000e-03,   2.87691000e-01,   3.58888000e-01,
          3.39875000e-01,   3.63744000e-01,   3.67565000e-01],
       [ -2.98269000e-02,   2.88764000e-01,   3.49127000e-01,
         -2.34874000e-02,  -1.09503000e-02,  -2.99691000e-02,
         -2.59696000e-02,   3.57171000e-01,  -1.65084000e-02,
         -2.04130000e-02,   3.61638000e-01,   3.39184000e-01,
          3.56305000e-01,   3.27946000e-01,   3.55257000e-01],
       [ -3.63559000e-02,   4.01041000e-01,   4.70182000e-01,
         -2.43535000e-02,  -3.15568000e-02,  -4.57663000e-02,
         -5.28870000e-02,   5.26694000e-01,  -1.30382000e-02,
         -5.82190000e-02,   5.17938000e-01,   5.04228000e-01,
          5.22251000e-01,   5.09747000e-01,   5.25493000e-01],
       [  4.06625000e-01,   5.88169000e-02,   5.23246000e-02,
          4.57369000e-01,   4.34374000e-01,   4.52388000e-01,
          4.53600000e-01,   5.34897000e-02,   3.31688000e-01,
          4.35773000e-01,   5.65922000e-02,   6.36120000e-02,
          5.00163000e-02,   6.17278000e-02,   4.91210000e-02],
       [ -2.67663000e-01,   5.17257000e-01,   5.58155000e-01,
         -2.75936000e-01,  -2.87685000e-01,  -3.11489000e-01,
         -3.12920000e-01,   6.09635000e-01,  -2.23530000e-01,
         -2.61998000e-01,   6.39847000e-01,   6.07620000e-01,
          6.06197000e-01,   5.98449000e-01,   6.15610000e-01],
       [  3.68540000e-02,   5.17398000e-01,   6.12172000e-01,
          3.21888000e-02,   9.60634000e-03,   8.27719000e-04,
          1.60307000e-02,   6.25721000e-01,   4.01135000e-03,
          1.75771000e-02,   6.37388000e-01,   6.36084000e-01,
          6.29151000e-01,   6.34905000e-01,   6.04959000e-01],
       [  2.08616000e-01,   3.98010000e-01,   4.84163000e-01,
          2.15345000e-01,   2.31141000e-01,   2.32099000e-01,
          2.33411000e-01,   4.88344000e-01,   1.65377000e-01,
          2.28438000e-01,   4.74110000e-01,   5.02621000e-01,
          4.92219000e-01,   4.77340000e-01,   5.08377000e-01],
       [ -3.22720000e-03,   6.78466000e-01,   8.35841000e-01,
         -6.14328000e-03,   4.60543000e-04,   4.98184000e-04,
          2.78292000e-03,   8.45752000e-01,  -8.08344000e-03,
          4.74270000e-03,   9.34708000e-01,   8.63915000e-01,
          8.84651000e-01,   8.71871000e-01,   8.65056000e-01],
       [  3.21129000e-02,   2.64005000e-01,   3.21095000e-01,
          9.35817000e-03,   2.49811000e-02,   2.85817000e-02,
          2.13276000e-02,   3.36613000e-01,   3.53961000e-02,
          3.41373000e-03,   3.32283000e-01,   3.26185000e-01,
          3.28905000e-01,   3.34830000e-01,   3.29323000e-01],
       [  1.07901000e-01,   1.37530000e-01,   2.03443000e-01,
          1.15241000e-01,   1.10851000e-01,   9.12920000e-02,
          7.92964000e-02,   2.18319000e-01,   7.24521000e-02,
          8.76965000e-02,   2.00012000e-01,   1.74089000e-01,
          2.16369000e-01,   2.18103000e-01,   1.93736000e-01],
       [  3.12001000e-01,   1.59224000e-01,   1.71225000e-01,
          3.16165000e-01,   3.48506000e-01,   3.43921000e-01,
          3.31858000e-01,   1.60152000e-01,   2.62346000e-01,
          3.24303000e-01,   2.07817000e-01,   1.75092000e-01,
          1.99093000e-01,   1.94725000e-01,   1.61459000e-01]])

# 將詞向量中值最大的維度的下標作爲該詞向量的標籤      
label = []
for values in word_embedding:
    label.append(np.argmax(values))

# 將詞向量轉化爲2維向量
fea = TSNE(n_components=2).fit_transform(word_embedding)

pdf = PdfPages('word_embedding_scatter.pdf')

# 畫散點圖
# 更多顏色請查看[https://www.cnblogs.com/qianblue/p/10783261.html]
cValue = ['red','yellow','green','blue','orangered','steelblue','slateblue','tomato','peru','darkorange','deeppink','crimson']
cls = np.unique(label)
fea_num = [fea[label == i] for i in cls] 
for i, f in enumerate(fea_num):
    if cls[i] in range(10): # 如果類別標籤爲10以內的數字,則使用'+'進行標記
        plt.scatter(f[:, 0], f[:, 1], label=cls[i], marker='+', edgecolor='none',c=cValue[i])
    else:
        plt.scatter(f[:, 0], f[:, 1], label=cls[i],edgecolor='none',c=cValue[i])

plt.tight_layout()
pdf.savefig()
plt.show()
pdf.close()

在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章