2019-06-25 22:26:21 +00:00
package main
import (
"bufio"
"encoding/json"
"fmt"
"github.com/grokify/html-strip-tags-go"
"github.com/mmcdole/gofeed"
"github.com/writeas/go-strip-markdown"
"golang.org/x/net/proxy"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"path"
"strconv"
"strings"
"time"
)
2019-06-25 23:12:59 +00:00
func makeTorifiedClient ( ) * http . Client {
torDialer , err := proxy . SOCKS5 ( "tcp" , "127.0.0.1:9050" , nil , proxy . Direct )
if err != nil {
log . Fatalf ( "Could not connect to Tor Proxy: %v" , err )
}
transportConfig := & http . Transport {
Dial : torDialer . Dial ,
}
client := new ( http . Client )
client . Transport = transportConfig
client . CheckRedirect = func ( r * http . Request , via [ ] * http . Request ) error {
r . URL . Opaque = r . URL . Path
return nil
}
return client
}
2019-06-25 22:26:21 +00:00
func fetch ( url string , cachepath string ) {
fmt . Printf ( "Fetching [%v]\n" , url )
fp := gofeed . NewParser ( )
2019-06-25 23:12:59 +00:00
fp . Client = makeTorifiedClient ( )
2019-06-25 22:26:21 +00:00
feed , err := fp . ParseURL ( url )
fmt . Printf ( "Feed %v %v\n" , feed , err )
cache , _ := json . Marshal ( feed )
ioutil . WriteFile ( cachepath , cache , 0644 )
}
func report ( ) map [ string ] gofeed . Feed {
feedMap := make ( map [ string ] gofeed . Feed )
items , _ := ioutil . ReadDir ( "." )
for _ , item := range items {
if item . IsDir ( ) {
//subitems, _ := ioutil.ReadDir(item.Name())
//for _, subitem := range subitems {
cachepath := path . Join ( "." , item . Name ( ) , "latest" )
var feed gofeed . Feed
data , err := ioutil . ReadFile ( cachepath )
if err == nil {
json . Unmarshal ( data , & feed )
// fmt.Printf("Feed %v\n", feed)
}
feedMap [ item . Name ( ) ] = feed
//}
}
}
return feedMap
}
func download ( url string , cachepath string ) {
fmt . Printf ( "Fetching [%v]\n" , url )
2019-06-25 23:12:59 +00:00
client := makeTorifiedClient ( )
2019-06-25 22:26:21 +00:00
2019-06-25 23:20:52 +00:00
req , err := http . NewRequest ( "GET" , url , nil )
if err != nil {
log . Fatalln ( err )
}
// Set User Agent to be Tor Browser (it likely won't be hard for a site to determine these requests are partially automated, but a little more obfuscation is never a bad idea)
req . Header . Set ( "User-Agent" , "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0" )
resp , err := client . Do ( req )
2019-06-25 22:26:21 +00:00
if err == nil {
defer resp . Body . Close ( )
body , _ := ioutil . ReadAll ( resp . Body )
ioutil . WriteFile ( cachepath , body , 0644 )
}
}
func update ( ) {
items , _ := ioutil . ReadDir ( "." )
for _ , item := range items {
if item . IsDir ( ) {
subitems , _ := ioutil . ReadDir ( item . Name ( ) )
for _ , subitem := range subitems {
if subitem . Name ( ) == "feedinfo" {
// handle file there
filepath := path . Join ( "." , item . Name ( ) , subitem . Name ( ) )
fmt . Println ( "Processing: " + filepath )
rawfeedinfo , _ := ioutil . ReadFile ( filepath )
feedinfo := strings . Split ( strings . TrimSpace ( string ( rawfeedinfo ) ) , " " )
cachepath := path . Join ( "." , item . Name ( ) , "latest" )
info , err := os . Stat ( cachepath )
if err == nil {
duration := time . Since ( info . ModTime ( ) )
cron , _ := strconv . Atoi ( feedinfo [ 1 ] )
// If it has been greater than <cron> minutes since the last fetch, we fetch
if ( time . Duration ( cron ) * time . Minute ) . Minutes ( ) < duration . Minutes ( ) {
fetch ( feedinfo [ 0 ] , cachepath )
} else {
}
} else {
// First time
fetch ( feedinfo [ 0 ] , cachepath )
}
} else if subitem . Name ( ) == "images" {
filepath := path . Join ( "." , item . Name ( ) , subitem . Name ( ) )
file , err := os . Open ( filepath )
if err != nil {
log . Fatal ( err )
}
defer file . Close ( )
scanner := bufio . NewScanner ( file )
for scanner . Scan ( ) {
image := scanner . Text ( )
imageinfo := strings . Split ( image , " " )
uri , _ := url . Parse ( imageinfo [ 0 ] )
cachepath := path . Join ( "." , item . Name ( ) , "latest-" + path . Base ( uri . Path ) )
info , err := os . Stat ( cachepath )
if err == nil {
duration := time . Since ( info . ModTime ( ) )
cron , _ := strconv . Atoi ( imageinfo [ 1 ] )
// If it has been greater than <cron> minutes since the last fetch, we fetch
if ( time . Duration ( cron ) * time . Minute ) . Minutes ( ) < duration . Minutes ( ) {
download ( imageinfo [ 0 ] , cachepath )
} else {
}
} else {
// First time
download ( imageinfo [ 0 ] , cachepath )
}
}
}
}
}
}
}
func processItems ( format [ ] string , items [ ] * gofeed . Item , dateCheck time . Duration ) {
num := 0
for _ , item := range items {
if dateCheck == 0 {
processItem ( format , * item )
num = 100
} else {
if item . PublishedParsed != nil {
if time . Since ( * item . PublishedParsed ) < dateCheck {
processItem ( format , * item )
num ++
}
} else if item . UpdatedParsed != nil {
if time . Since ( * item . UpdatedParsed ) < dateCheck {
processItem ( format , * item )
num ++
}
}
}
}
if num == 0 {
fmt . Printf ( "* Nothing new this %v From %v\n" , format [ 1 ] , format [ 0 ] )
}
}
// stripString removes html, then potential markdown characters, and then some additional potential markdown
func stripString ( input string ) string {
return strings . Replace ( stripmd . Strip ( strip . StripTags ( input ) ) , "`" , "" , - 1 )
}
func processItem ( format [ ] string , item gofeed . Item ) {
fmt . Printf ( "* " )
for i := 2 ; i < len ( format ) ; i ++ {
switch format [ i ] {
case "Title" :
fmt . Printf ( "%v " , stripString ( item . Title ) )
case "Link" :
fmt . Printf ( "[%v](%v)" , stripString ( item . Link ) , stripString ( item . Link ) )
case "Description" :
fmt . Printf ( "\n * %v <hr/>" , strings . Replace ( stripString ( item . Description ) , "\n" , "" , - 1 ) )
}
}
fmt . Printf ( "\n" )
}
func main ( ) {
if len ( os . Args ) >= 2 {
cmd := os . Args [ 1 ]
switch cmd {
case "update" :
update ( )
case "report" :
if len ( os . Args ) == 3 {
feeds := report ( )
file , err := os . Open ( os . Args [ 2 ] )
if err != nil {
log . Fatal ( err )
}
defer file . Close ( )
scanner := bufio . NewScanner ( file )
for scanner . Scan ( ) {
line := scanner . Text ( )
reportLine := strings . Split ( line , " " )
if strings . HasPrefix ( reportLine [ 0 ] , "%" ) {
// Ignore, this is a comment
} else if strings . HasPrefix ( reportLine [ 0 ] , "#" ) || strings . HasPrefix ( reportLine [ 0 ] , "<" ) {
fmt . Printf ( "%v\n" , strings . TrimSpace ( line ) )
} else if len ( reportLine ) > 2 {
_ , exists := feeds [ reportLine [ 0 ] ]
if exists {
if reportLine [ 1 ] == "ALL" {
processItems ( reportLine , feeds [ reportLine [ 0 ] ] . Items , 0 )
} else if reportLine [ 1 ] == "DAY" { // Only output entries from the last Day
processItems ( reportLine , feeds [ reportLine [ 0 ] ] . Items , time . Hour * 24 )
} else if reportLine [ 1 ] == "WEEK" { // Only output entries from the last Week
processItems ( reportLine , feeds [ reportLine [ 0 ] ] . Items , time . Hour * 24 * 7 )
} else {
index , _ := strconv . Atoi ( reportLine [ 1 ] )
processItem ( reportLine , * feeds [ reportLine [ 0 ] ] . Items [ index ] )
}
} else {
log . Fatalf ( "Report Template Contains Non-Existent Feed %v\n" , reportLine [ 0 ] )
}
} else {
fmt . Printf ( "\n" )
}
}
if err := scanner . Err ( ) ; err != nil {
log . Fatal ( err )
}
}
}
}
os . Exit ( 0 )
}