stanford coreNLP CRFClassifier 模型加載和序列化

源代碼位置:ie.crf.CRFClassifier

模型加載

loadClassifier(String loadPath, Properties props)


/**
 * Loads a classifier from the file, classpath resource, or URL specified by loadPath. If loadPath ends in
 * .gz, uses a GZIPInputStream.
 */
//seg here ,ner here
public void loadClassifier(String loadPath, Properties props) throws ClassCastException, IOException, ClassNotFoundException {
  InputStream is = IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(loadPath);
  Timing t = new Timing();
  loadClassifier(is, props);
  is.close();
  t.done(log, "Loading classifier from " + loadPath);
}


loadClassifier(ObjectInputStream ois, Properties props)

/**
   * Loads a classifier from the specified InputStream. This version works
   * quietly (unless VERBOSE is true). If props is non-null then any properties
   * it specifies override those in the serialized file. However, only some
   * properties are sensible to change (you shouldn't change how features are
   * defined).
   * <p>
   * <i>Note:</i> This method does not close the ObjectInputStream. (But earlier
   * versions of the code used to, so beware....)
   */
  @Override
  @SuppressWarnings( { "unchecked" })
  // can't have right types in deserialization
  //seg here,ner here
  public void loadClassifier(ObjectInputStream ois, Properties props) throws ClassCastException, IOException,
      ClassNotFoundException {
    Object o = ois.readObject();
    // TODO: when we next break serialization, get rid of this fork and only read the List<Index> (i.e., keep first case)
    if (o instanceof List) {
      labelIndices = (List<Index<CRFLabel>>) o;
    } else {
      Index<CRFLabel>[] indexArray = (Index<CRFLabel>[]) o;
      labelIndices = new ArrayList<>(indexArray.length);
      Collections.addAll(labelIndices, indexArray);
    }
    classIndex = (Index<String>) ois.readObject();
    featureIndex = (Index<String>) ois.readObject();
    flags = (SeqClassifierFlags) ois.readObject();
    if (flags.useEmbedding) {
      embeddings = (Map<String, double[]>) ois.readObject();
    }
    Object featureFactory = ois.readObject();
    if (featureFactory instanceof List) {
      featureFactories = ErasureUtils.uncheckedCast(featureFactories);
//      int i = 0;
//      for (FeatureFactory ff : featureFactories) { // XXXX
//        System.err.println("List FF #" + i + ": " + ((NERFeatureFactory) ff).describeDistsimLexicon()); // XXXX
//        i++;
//      }
    } else if (featureFactory instanceof FeatureFactory) {
      featureFactories = Generics.newArrayList();
      featureFactories.add((FeatureFactory) featureFactory);
//      System.err.println(((NERFeatureFactory) featureFactory).describeDistsimLexicon()); // XXXX
    } else if (featureFactory instanceof Integer) {
      // this is the current format (2014) since writing list didn't work (see note in serializeClassifier).
      int size = (Integer) featureFactory;
      featureFactories = Generics.newArrayList(size);
      for (int i = 0; i < size; ++i) {
        featureFactory = ois.readObject();
        if (!(featureFactory instanceof FeatureFactory)) {
          throw new RuntimeIOException("Should have FeatureFactory but got " + featureFactory.getClass());
        }
//        System.err.println("FF #" + i + ": " + ((NERFeatureFactory) featureFactory).describeDistsimLexicon()); // XXXX
        featureFactories.add((FeatureFactory) featureFactory);
      }
    }

    // log.info("properties passed into CRF's loadClassifier are:" + props);
    if (props != null) {
      flags.setProperties(props, false);
    }

    windowSize = ois.readInt();
    weights = (double[][]) ois.readObject();

    // WordShapeClassifier.setKnownLowerCaseWords((Set) ois.readObject());
    Set<String> lcWords = (Set<String>) ois.readObject();
    if (lcWords instanceof MaxSizeConcurrentHashSet) {
      knownLCWords = (MaxSizeConcurrentHashSet<String>) lcWords;
    } else {
      knownLCWords = new MaxSizeConcurrentHashSet<>(lcWords);
    }

    reinit();

    if (flags.labelDictionaryCutoff > 0) {
      labelDictionary = (LabelDictionary) ois.readObject();
    }

    if (VERBOSE) {
      log.info("windowSize=" + windowSize);
      log.info("flags=\n" + flags);
    }
  }

模型序列化

/**
 * Serialize the classifier to the given ObjectOutputStream.
 * <br>
 * (Since the classifier is a processor, we don't want to serialize the
 * whole classifier but just the data that represents a classifier model.)
 */
@Override
public void serializeClassifier(ObjectOutputStream oos) {
  try {
    oos.writeObject(labelIndices);
    oos.writeObject(classIndex);
    oos.writeObject(featureIndex);
    oos.writeObject(flags);
    if (flags.useEmbedding) {
      oos.writeObject(embeddings);
    }
    // For some reason, writing out the array of FeatureFactory
    // objects doesn't seem to work.  The resulting classifier
    // doesn't have the lexicon (distsim object) correctly saved.  So now custom write the list
    oos.writeObject(featureFactories.size());
    for (FeatureFactory ff : featureFactories) {
      oos.writeObject(ff);
    }
    oos.writeInt(windowSize);
    oos.writeObject(weights);
    // oos.writeObject(WordShapeClassifier.getKnownLowerCaseWords());

    oos.writeObject(knownLCWords);
    if (labelDictionary != null) {
      oos.writeObject(labelDictionary);
    }
  } catch (IOException e) {
    throw new RuntimeIOException(e);
  }
}




發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章