path: root/post/post.go



/*
Package post provides the data and behavior for the fundamental atomic
unit of a site: a post. Posts are represented as text files, then converted to HTML and other formats
*/
package post

import (
	"adammathes.com/snkt/config"
	"adammathes.com/snkt/render"
	"adammathes.com/snkt/text"
	"adammathes.com/snkt/vlog"
	"github.com/microcosm-cc/bluemonday"
	"github.com/russross/blackfriday"
	"github.com/rwcarlsen/goexif/exif"
	"io/ioutil"
	"log"
	"os"
	"path"
	"path/filepath"
	"strconv"
	"strings"
	"time"
)

var Template = "post"

type Post struct {
	// Representations of the entire post text
	Raw      []byte
	Unparsed string

	// Metadata
	Meta       map[string]string
	SourceFile string
	Title      string `json:"title"`
	Permalink  string `json:"permalink"`
	Time       time.Time
	Year       int
	Month      time.Month
	Day        int
	InFuture   bool
	WordCount  int
	Tags       []string
	Urls       []string
	Imgs       []string

	// Content text -- raw, unprocessed, unfiltered markdown
	Text string

	// Content text -- processed into HTML via markdown and other filters
	Content string

	// Content with sources and references resolved to absolute URLs
	AbsoluteContent string

	// AbsoluteContent with sanitizing for RSS feeds
	SafeContent string

	// Content HTML tags removed
	PlainText string

	// Post following chronologically (later)
	Next *Post
	// Post preceding chronologically (earlier)
	Prev *Post

	// Precomputed dates as strings
	Date    string
	RssDate string

	FileInfo    os.FileInfo
	Extension   string
	ContentType string

	Site sitemeta
}

type sitemeta interface {
	GetURL() string
	GetTitle() string
}

type Posts []*Post

func (posts Posts) Len() int {
	return len(posts)
}

func (posts Posts) Less(i, j int) bool {
	return posts[i].Time.Before(posts[j].Time)
}

func (s Posts) Swap(i, j int) {
	s[i], s[j] = s[j], s[i]
}

func NewPost(s sitemeta) *Post {
	var p Post
	p.Site = s
	return &p
}

/*
Read reads a post from file fi, and parses it into the Post struct, performing any work needed to fully populate the struct
*/
func (p *Post) Read(fi os.FileInfo) {
	p.Meta = make(map[string]string)
	p.FileInfo = fi
	p.SourceFile = p.FileInfo.Name()
	var err error

	// this is an abominaion

	ext := filepath.Ext(fi.Name())
	// ext includes the '.'
	if len(ext) > 1 {
		p.Extension = strings.ToLower(ext[1:])
	}

	// TODO: use MIMETYPE instead of just extension
	switch p.Extension {
	case "bmp", "gif", "jpg", "jpeg", "png", "tiff":
		p.ContentType = "image"
		p.Unparsed = ""
		p.parseExif()
	case "mp4", "mpeg":
		p.ContentType = "video"
		p.Unparsed = ""
		// TODO: parse video headers
	case "mp3":
		p.ContentType = "audio"
		p.Unparsed = ""
		// TODO: mp3/id3 extraction
	default:
		// TODO: sanity check text vs. binary
		p.ContentType = "text"
		p.Raw, err = ioutil.ReadFile(path.Join(config.Config.TxtDir, p.FileInfo.Name()))
		if err != nil {
			log.Println(err)
		}
		p.Unparsed = string(p.Raw)
	}
	p.parse()
	// end abomination
}

func (p *Post) AbsoluteFilePath() string {
	return path.Join(config.Config.TxtDir, p.FileInfo.Name())
}

/*
Try to extract metadata from EXIF
*/
func (p *Post) parseExif() {
	f, err := os.Open(p.AbsoluteFilePath())
	if err != nil {
		vlog.Printf("%v", err)
		return
	}

	x, err := exif.Decode(f)
	if err != nil {
		vlog.Printf("%v", err)
		return
	}

	tm, err := x.DateTime()
	if err != nil {
		vlog.Printf("%v", err)
		return
	}
	p.Time = tm

	// TODO: full exif parsing | metadata propogation but exif is ugh
	p.Meta["Exif"] = x.String()
}

/*
Parse parses the metadata prefix from the top of the post file's raw bytes, and puts the rest in the text segment. Meta is a name:value mapping
Title, date and other metadata are derived
*/
func (p *Post) parse() {
	//
	// fills p.Text, p.Meta[string][string]
	//
	p.splitTextMeta()

	//
	// Title
	//
	p.Title = p.Meta["title"]
	// Use filename as backup if we have no explicit title
	if p.Title == "" {
		p.Title = p.SourceFile
	}

	p.parseDates()

	//
	// Content
	//
	p.Content = string(p.Filter([]byte(p.Text)))
	p.AbsoluteContent = render.ResolveURLs(p.Content, p.Site.GetURL())

	policy := bluemonday.UGCPolicy()
	policy.RequireNoFollowOnLinks(false)
	p.SafeContent = policy.Sanitize(p.AbsoluteContent)

	policy = bluemonday.StrictPolicy()
	p.PlainText = policy.Sanitize(p.Content)
	p.PlainText = strings.Replace(p.PlainText, "\n\n", "\n", -1)
	p.PlainText = strings.Replace(p.PlainText, "  ", " ", -1)

	// WordCount
	p.WordCount = len(strings.Split(p.PlainText, " "))

	// Tags
	// TODO: separate tag stuff to other module
	if p.Meta["tags"] != "" {
		tags := strings.Split(p.Meta["tags"], ",")
		for _, tag := range tags {
			p.Tags = append(p.Tags, NormalizeTag(tag))
		}
	}

	// Images and URLs
	p.Urls = render.FindURLs(p.AbsoluteContent)
	p.Imgs = render.FindImgs(p.AbsoluteContent)
}

/*
NormalizeTag trims leading/ending spaces, lowercases, and replaces internal spaces with _
*/
func NormalizeTag(tag string) string {
	t := strings.ToLower(strings.TrimSpace(tag))
	return strings.Replace(t, " ", "_", -1)
}

/*
splitText splits p.Unparsed into p.Text and p.Meta[attr][value]
*/
func (p *Post) splitTextMeta() {
	if p.Unparsed == "" {
		p.Text = ""
		return
	}
	SEPARATOR := ":"
	lines := strings.Split(p.Unparsed, "\n")
	for _, line := range lines {
		if !strings.Contains(line, SEPARATOR) {
			break
		}
		splitdex := strings.Index(line, SEPARATOR)
		attr := strings.ToLower(strings.TrimSpace(line[0:splitdex]))
		value := strings.TrimSpace(line[splitdex+1:])
		p.Meta[attr] = value
	}
	p.Text = strings.Join(lines[len(p.Meta):], "\n")
}

func (p *Post) ParseFmt(s string) string {
	// TODO: document and add strftime like formats
	s = strings.Replace(s, "%Y", strconv.Itoa(p.Year), -1)
	s = strings.Replace(s, "%M", strconv.Itoa(int(p.Month)), -1)
	s = strings.Replace(s, "%D", strconv.Itoa(p.Day), -1)
	s = strings.Replace(s, "%F", p.CleanFilename(), -1)
	s = strings.Replace(s, "%T", p.CleanTitle(), -1)

	s = strings.Replace(s, "$Y", strconv.Itoa(p.Year), -1)
	s = strings.Replace(s, "$M", strconv.Itoa(int(p.Month)), -1)
	s = strings.Replace(s, "$D", strconv.Itoa(p.Day), -1)
	s = strings.Replace(s, "$F", p.CleanFilename(), -1)
	s = strings.Replace(s, "$T", p.CleanTitle(), -1)

	s = strings.Replace(s, ".File", p.CleanFilename(), -1)
	s = strings.Replace(s, ".Title", p.CleanTitle(), -1)
	s = strings.Replace(s, ".Year", strconv.Itoa(p.Year), -1)
	s = strings.Replace(s, ".Month", strconv.Itoa(int(p.Month)), -1)
	s = strings.Replace(s, ".Day", strconv.Itoa(p.Day), -1)

	return s
}

func (p *Post) parseDates() {

	// in the case of exif
	if (p.Time != time.Time{}) {
		p.fillDates()
		return
	}

	//
	// Dates
	//
	// we only deal with yyyy-mm-dd [some legacy dates from my archives have times tacked on]
	// TODO: recover from empty dates/titles
	// TODO: probably should actually use times when present and clean up my archives
	var date_str = ""
	ds := strings.Fields(p.Meta["date"])
	if len(ds) > 0 {
		date_str = ds[0]
	}

	if date_str == "" {
		p.Time = p.FileInfo.ModTime()
		vlog.Printf("no date field in post %s, using file modification time\n", p.SourceFile)
	} else {
		var err error
		p.Time, err = time.ParseInLocation("2006-1-2", date_str, time.Local)
		if err != nil {
			// fallback is to use file modtime
			// should use create time but that doesn't seem to be in stdlib
			// TODO: figure out how to use file birth time
			vlog.Printf("no valid date parsed for post %s, using file modification time\n", p.SourceFile)
			p.Time = p.FileInfo.ModTime()
		}
	}
	p.fillDates()
}

/*
Given p.Time, create the other derived date fields
*/
func (p *Post) fillDates() {
	p.Year, p.Month, p.Day = p.Time.Date()
	/* golang date format refresher
	      1 2  3  4  5  7     6
	Mon Jan 2 15:04:05 MST 2006 */

	p.Date = p.Time.Format("January 2, 2006")
	p.RssDate = p.Time.Format(time.RFC822)
	p.InFuture = time.Now().Before(p.Time)
	p.Permalink = p.GenPermalink()
}

func (p *Post) CleanFilename() string {
	return text.SanitizeFilename(text.RemoveExt(p.SourceFile))
}

func (p *Post) CleanTitle() string {
	return text.SanitizeFilename(p.Title)
}

/*
GenPermalink generates the permalink for the post given the PermalinkFmt format specified in the configuration file.
*/
func (p *Post) GenPermalink() string {
	pl := config.Config.PermalinkFmt
	return p.ParseFmt(pl)
}

/*
Target returns a string representing the file system location to write the output file representing the post.
*/
func (p Post) Target() string {
	pf := config.Config.PostFileFmt
	return path.Join(config.Config.HtmlDir, p.ParseFmt(pf))
}

/*
Render returns the post rendered as HTML via the post template with Post and Site as context.
*/
func (p Post) Render() []byte {
	data := struct {
		Post interface{}
		Site interface{}
	}{&p, &p.Site}
	return render.Render(Template, data)
}

/*
Filter runs the text through filters defined by render.Filter and markdown, returning text suitable for HTML output.
*/
func (p *Post) Filter(txt []byte) []byte {
	txt = render.Filter(txt)
	txt = blackfriday.MarkdownCommon(txt)
	return txt
}

/*
Limit returns a slice of Posts up to the int limit provided. If the limit is larger than the slice, it just returns the whole slice.
*/
func (posts Posts) Limit(limit int) Posts {
	if len(posts) < limit {
		return posts
	} else {
		return posts[0:limit]
	}
}

/*
ContainsTag returns true if Post `p` has `tag` in its set of tags.
*/
func (p *Post) ContainsTag(tag string) bool {
	for _, t := range p.Tags {
		if t == tag {
			return true
		}
	}
	return false
}

/*
Returns the first words of the plain text version of the post, up to `maxWords`
*/
func (p *Post) FirstWords(maxWords int) string {
	words := strings.Split(p.PlainText, " ")
	if len(words) <= maxWords {
		maxWords = len(words)
	}

	return strings.Join(words[0:maxWords], " ")
}

/*
Returns one or more words of the plain text version of the post, up to `maxChars`
*/
func (p *Post) FirstChars(maxChars int) string {

	s := ""

	words := strings.Split(p.PlainText, " ")
	for _, word := range words {
		if len(s)+len(word) > maxChars {
			break
		}
		s = s + " " + word
	}
	return s
}