用golang在服務端將html轉爲word(docx)嘗試

對於存在數據庫表中的html內容,在頁面上解析很多支持的前端,小程序端也有專門的解析。可導出到word,不用前端怎麼弄?

我用goquery+github.com/unidoc/unioffice來嘗試一下。

將engineercms裏的日誌嘗試導出來。

設代日誌在線查閱: 

和導出的word格式對比一下吧:

 

思路是這樣的,先取出內容,用goquery先分出<p裏的段落,一方面提取txt,另一方面提取html,後者含有<img裏的src圖片地址

然後循環段落,每段後面插入img,只能這樣笨辦法了。完全嵌入圖片,就麻煩點了。

type DiaryContent struct {
	Txt  string
	Html string
}

func (c *DiaryController) HtmlToDoc() {
	id := beego.AppConfig.String("wxdiaryprojectid") //"26159" //25002珠三角設代日記id26159

	// limit := "10"
	limit := c.Input().Get("limit")
	limit1, err := strconv.Atoi(limit)
	if err != nil {
		beego.Error(err)
	}
	page := c.Input().Get("page")

	page1, err := strconv.Atoi(page)
	if err != nil {
		beego.Error(err)
	}

	var idNum int64
	//id轉成64爲
	idNum, err = strconv.ParseInt(id, 10, 64)
	if err != nil {
		beego.Error(err)
	}

	var offset int
	if page1 <= 1 {
		offset = 0
	} else {
		offset = (page1 - 1) * limit1
	}

	// diaries, err := models.GetWxDiaries(idNum, limit1, offset)
	diaries, err := models.GetWxDiaries2(idNum, limit1, offset)
	if err != nil {
		beego.Error(err)
	}

	doc := document.New()

	for _, v := range diaries {
		did := v.Diary.Id
		// wxsite := beego.AppConfig.String("wxreqeustsite")

		Diary, err := models.GetDiary(did)
		if err != nil {
			beego.Error(err)
		}
		para := doc.AddParagraph()
		run := para.AddRun()
		para.SetStyle("Title")
		run.AddText(Diary.Title)

		para = doc.AddParagraph()
		para.SetStyle("Heading1")
		run = para.AddRun()
		run.AddText(Diary.Diarydate)

		//將一篇日誌分段,通過<p標籤
		slice1 := make([]DiaryContent, 0)

		var r io.Reader = strings.NewReader(string(Diary.Content))
		goquerydoc, err := goquery.NewDocumentFromReader(r)
		if err != nil {
			beego.Error(err)
		}

		goquerydoc.Find("p").Each(func(i int, s *goquery.Selection) {
			sel, _ := s.Html()
			bb := make([]DiaryContent, 1)
			bb[0].Html = sel
			txt := s.Text()
			bb[0].Txt = txt
			slice1 = append(slice1, bb...)
		})

		for _, w := range slice1 {
			//在每段裏查找img標籤
			// beego.Info(w)
			var r2 io.Reader = strings.NewReader(w.Html)
			goquerydoc2, err := goquery.NewDocumentFromReader(r2)
			if err != nil {
				beego.Error(err)
			}
			slice2 := make([]Img, 0)
			goquerydoc2.Find("img").Each(func(i int, s2 *goquery.Selection) {
				sel2, _ := s2.Attr("src")
				// beego.Info(sel2)
				aa := make([]Img, 1)
				sel3 := strings.Replace(sel2, "/attachment/", "attachment/", -1)
				aa[0].Src = sel3
				aa[0].Name = path.Base(sel2)
				slice2 = append(slice2, aa...)
			})

			para = doc.AddParagraph()
			para.Properties().SetFirstLineIndent(0.354331 * measurement.Inch)
			run = para.AddRun()
			run.AddText(w.Txt)

			if len(slice2) > 0 {
				for _, x := range slice2 {
					img1, err := common.ImageFromFile(x.Src)
					if err != nil {
						log.Fatalf("unable to create image: %s", err)
					}
					img1ref, err := doc.AddImage(img1)
					if err != nil {
						log.Fatalf("unable to add image to document: %s", err)
					}
					para = doc.AddParagraph()
					run = para.AddRun()

					inl, err := run.AddDrawingInline(img1ref)
					if err != nil {
						log.Fatalf("unable to add inline image: %s", err)
					}
					inl.SetSize(5.5*measurement.Inch, 5.5*measurement.Inch)
				}
			}
		}

	}
	newname := strconv.FormatInt(time.Now().UnixNano(), 10) + ".docx"
	doc.SaveToFile("static/" + newname)
	c.Data["json"] = map[string]interface{}{"info": "SUCCESS", "filename": newname}
	c.ServeJSON()
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章