Add Content Addressing to Timeline
continuous-integration/drone/push Build is passing Details
continuous-integration/drone/pr Build is passing Details

In order to implement features like quoting/reply-to we need a way
to reference messages that have been previously sent in a way that
is compatible across domains (i.e. p2p and groups).

For groups we could use signature as a universal identifier, but we have
no such analog in p2p - (note that adding a signature to p2p would compromise the
deniability properties of that protocol and as such wasn't considered.)

This PR creates a new index in Timeline that allows messages to be looked
up by their sender + message body. GetMessagesByHash returns a list of
matching messages that can then be used for a variety of applications
e.g. reply-to or duplicate detection.

Implementing reply-to would then be as simple as including the
content-hash of the replied to message in the overlay envelope, looking
up that hash in the timeline and finding the most recent message that
predates the index of the current message.
This commit is contained in:
Sarah Jamie Lewis 2021-07-02 10:50:05 -07:00
parent 73e9a6efe7
commit 4f6cba2900
2 changed files with 93 additions and 1 deletions

View File

@ -1,7 +1,9 @@
package model
import (
"crypto/sha256"
"encoding/base64"
"errors"
"sort"
"sync"
"time"
@ -16,6 +18,22 @@ type Timeline struct {
// a cache to allow quick checks for existing messages...
signatureCache map[string]bool
// a cache to allowing looking up messages by content hash
// we need this for features like reply-to message, and other self
// referential applications.
// note: that the index stored here is not global as different peers may have difference views of the timeline
// depending on if they save history, and when the last time they purged their timeline was, as such we can't
// simply send the index of the message.
hashCache map[string][]int
}
// LocallyIndexedMessage is a type wrapper around a Message and a TimeLine Index that is local to this
// instance of the timeline.
type LocallyIndexedMessage struct {
Message
LocalIndex int
}
// Message is a local representation of a given message sent over a group chat channel.
@ -76,11 +94,50 @@ func (t *Timeline) SetMessages(messages []Message) {
defer t.lock.Unlock()
t.init()
t.Messages = messages
for _, message := range t.Messages {
for idx, message := range t.Messages {
t.signatureCache[base64.StdEncoding.EncodeToString(message.Signature)] = true
t.hashCache[t.calculateHash(message)] = append(t.hashCache[t.calculateHash(message)], idx)
}
}
// GetMessagesByHash attempts to find messages that match the given
// content hash in the timeline. If successful it returns a list of messages as well as their local index
//, on failure it returns an error.
// We return a list of messages because content hashes are not guaranteed to be unique from a given Peer. This allows
// us to do things like: ensure that reply-to and quotes reference the last seen message from the message they are quoted
// in or detect duplicate messages from a peer.
func (t *Timeline) GetMessagesByHash(contentHash string) ([]LocallyIndexedMessage, error) {
t.lock.Lock()
defer t.lock.Unlock()
t.init()
if idxs, exists := t.hashCache[contentHash]; exists {
var messages []LocallyIndexedMessage
for _,idx := range idxs {
messages = append(messages, LocallyIndexedMessage{LocalIndex: idx, Message: t.Messages[idx]})
}
return messages, nil
}
return nil, errors.New("cannot find message by hash")
}
// calculateHash calculates the content hash of a given message
// the content used is the sender of the message, the body of the message
//
// content hashes must be calculable across timeline views so that different participants can
// calculate the same hash for the same message - as such we cannot use timestamps from peers or groups
// as they are mostly fuzzy.
//
// As a reminder: for p2p messages PeerID is authenticated by the initial 3DH handshake, for groups
// each message is signed by the sender, and this signature is checked prior to inclusion in the timeline.
//
// Multiple messages from the same peer can result in the same hash (where the same user sends the same message more
// than once) - in this case we will only store the idx of the most recent message - and use that for reference lookups.
func (t *Timeline) calculateHash(message Message) string {
content := []byte(message.PeerID + message.Message)
contentBasedHash := sha256.Sum256(content)
return base64.StdEncoding.EncodeToString(contentBasedHash[:])
}
// Len gets the length of the timeline
func (t *Timeline) Len() int {
return len(t.Messages)
@ -130,13 +187,20 @@ func (t *Timeline) Insert(mi *Message) bool {
// assert timeline is initialized
t.init()
// check that we haven't seen this message before (this has no impact on p2p messages, but is essential for
// group messages)
_, exists := t.signatureCache[base64.StdEncoding.EncodeToString(mi.Signature)]
if exists {
return true
}
// update the message store
t.Messages = append(t.Messages, *mi)
// add to signature cache for fast checking of group messages...
t.signatureCache[base64.StdEncoding.EncodeToString(mi.Signature)] = true
// content based addressing index
contentHash := t.calculateHash(*mi)
t.hashCache[contentHash] = append(t.hashCache[contentHash], len(t.Messages)-1)
return false
}
@ -145,4 +209,8 @@ func (t *Timeline) init() {
if t.signatureCache == nil {
t.signatureCache = make(map[string]bool)
}
if t.hashCache == nil {
t.hashCache = make(map[string][]int)
}
}

View File

@ -100,4 +100,28 @@ func TestTranscriptConsistency(t *testing.T) {
t.Logf("Messages %v: %v %x %x", i, m.Message, m.Signature, m.PreviousMessageSig)
}
// Test message by hash lookup...
hash := timeline.calculateHash(*m5)
t.Logf("Looking up %v ", hash)
for key,msgs := range timeline.hashCache {
t.Logf("%v %v", key, msgs)
}
// check a real message..
msgs, err := timeline.GetMessagesByHash(hash)
if err != nil || len(msgs) != 1 {
t.Fatalf("looking up message by hash %v should have not errored: %v", hash, err)
} else if msgs[0].Message.Message != m5.Message {
t.Fatalf("%v != %v", msgs[0].Message, m5.Message)
}
// Check a non existed hash... error if there is no error
_, err = timeline.GetMessagesByHash("not a real hash")
if err == nil {
t.Fatalf("looking up message by hash %v should have errored: %v", hash, err)
}
}