From e6c3f581acc5c0249e634b25caa988f67a81db7e Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Mon, 3 Jan 2022 14:33:07 +0100 Subject: [PATCH 01/26] Outputs process-open errors to the console fixes #20 --- app/scan.go | 4 ++++ procio/process_linux.go | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/app/scan.go b/app/scan.go index d4dc39e..f25926a 100644 --- a/app/scan.go +++ b/app/scan.go @@ -12,6 +12,8 @@ import ( "strconv" "time" + "github.com/fatih/color" + "github.com/fkie-cad/yapscan/system" "github.com/fkie-cad/yapscan" @@ -270,12 +272,14 @@ func scan(c *cli.Context) error { for _, pid := range pids { func() { if pid == os.Getpid() { + color.Yellow("\nWARN: PID %d is the yapscan process, skipping!", pid) // Don't scan yourself as that will cause unwanted matches. return } proc, err := procio.OpenProcess(pid) if err != nil { + color.Red("\nERROR: Could not open process %d for scanning, reason: %v!", pid, err) logrus.WithError(err).Errorf("could not open process %d for scanning", pid) return } diff --git a/procio/process_linux.go b/procio/process_linux.go index 42c2dd2..680ca28 100644 --- a/procio/process_linux.go +++ b/procio/process_linux.go @@ -60,6 +60,16 @@ func GetRunningPIDs() ([]int, error) { } func open(pid int) (Process, error) { + _, err := os.Stat(fmt.Sprintf("/proc/%d", pid)) + if os.IsNotExist(err) { + return nil, fmt.Errorf("process does not exist") + } + if os.IsPermission(err) { + return nil, fmt.Errorf("insufficient permissions") + } + if err != nil { + return nil, fmt.Errorf("unexpected error: %w", err) + } return &processLinux{pid: pid}, nil } From a80f01d8e6c1aad5d5d1b94ca4979ca42c0861d5 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Mon, 3 Jan 2022 16:15:52 +0100 Subject: [PATCH 02/26] Sets JSON datetime format to RFC3339+microSeconds This allows for much easier parsing in python and microsecond precision is sufficient for the usecase. fixes #19 --- report/time.go | 29 +++++++++++++++++++++++++++++ yara.go | 23 ++++++++++++----------- 2 files changed, 41 insertions(+), 11 deletions(-) create mode 100644 report/time.go diff --git a/report/time.go b/report/time.go new file mode 100644 index 0000000..4a0a59b --- /dev/null +++ b/report/time.go @@ -0,0 +1,29 @@ +package report + +import ( + "time" +) + +const Format = "2006-01-02T15:04:05.000000Z07:00" + +type Time struct { + time.Time +} + +func Now() Time { + return Time{time.Now()} +} + +func (t Time) MarshalJSON() ([]byte, error) { + b := make([]byte, 0, len(Format)+2) + b = append(b, '"') + b = t.AppendFormat(b, Format) + b = append(b, '"') + return b, nil +} + +func (t *Time) UnmarshalJSON(b []byte) error { + tmp, err := time.Parse(`"`+Format+`"`, string(b)) + t.Time = tmp + return err +} diff --git a/yara.go b/yara.go index d522a96..215233a 100644 --- a/yara.go +++ b/yara.go @@ -12,6 +12,7 @@ import ( "sync" "time" + "github.com/fkie-cad/yapscan/report" "github.com/fkie-cad/yapscan/system" "github.com/yeka/zip" @@ -34,18 +35,18 @@ var YaraRulesFileExtensions = []string{ } type ProfilingInformation struct { - Time time.Time `json:"time"` - FreeRAM uintptr `json:"freeRAM"` - FreeSwap uintptr `json:"freeSwap"` - LoadAvgOneMinute float64 `json:"loadAvgOneMinute"` - LoadAvgFiveMinutes float64 `json:"loadAvgFiveMinutes"` - LoadAvgFifteenMinutes float64 `json:"loadAvgFifteenMinutes"` + Time report.Time `json:"time"` + FreeRAM uintptr `json:"freeRAM"` + FreeSwap uintptr `json:"freeSwap"` + LoadAvgOneMinute float64 `json:"loadAvgOneMinute"` + LoadAvgFiveMinutes float64 `json:"loadAvgFiveMinutes"` + LoadAvgFifteenMinutes float64 `json:"loadAvgFifteenMinutes"` } // ScanningStatistics holds statistic information about a scan. type ScanningStatistics struct { - Start time.Time `json:"start"` - End time.Time `json:"end"` + Start report.Time `json:"start"` + End report.Time `json:"end"` NumberOfProcessesScanned uint64 `json:"numberOfProcessesScanned"` NumberOfSegmentsScanned uint64 `json:"numberOfSegmentsScanned"` NumberOfMemoryBytesScanned uint64 `json:"numberOfMemoryBytesScanned"` @@ -61,7 +62,7 @@ type ScanningStatistics struct { func NewScanningStatistics() *ScanningStatistics { return &ScanningStatistics{ - Start: time.Now(), + Start: report.Now(), mux: &sync.Mutex{}, } } @@ -101,7 +102,7 @@ func (s *ScanningStatistics) StartProfiler(ctx context.Context, scanInterval tim "freeSwap": freeSwap, }).Trace("Memory profile.") s.ProfilingInformation = append(s.ProfilingInformation, &ProfilingInformation{ - Time: time.Now(), + Time: report.Now(), FreeRAM: freeRAM, FreeSwap: freeSwap, LoadAvgOneMinute: loadAvg1, @@ -153,7 +154,7 @@ func (s *ScanningStatistics) Finalize() { <-s.profilerDone } - s.End = time.Now() + s.End = report.Now() } // YaraScanner is a wrapper for yara.Rules, with a more go-like interface. From ee510ced0b3bbeefb80187cfd0d7de751a02250d Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Mon, 3 Jan 2022 16:45:56 +0100 Subject: [PATCH 03/26] Extracts version information --- app/app.go | 4 +++- version/version.go | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 version/version.go diff --git a/app/app.go b/app/app.go index 82dab4e..cb573da 100644 --- a/app/app.go +++ b/app/app.go @@ -6,6 +6,8 @@ import ( "runtime" "strings" + "github.com/fkie-cad/yapscan/version" + "github.com/fkie-cad/yapscan" "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" @@ -207,7 +209,7 @@ func MakeApp(args []string) *cli.App { Name: "yapscan", HelpName: "yapscan", Description: "A yara based scanner for files and process memory with some extras.", - Version: "0.12.0", + Version: version.YapscanVersion.String(), Writer: os.Stdout, ErrWriter: os.Stderr, Authors: []*cli.Author{ diff --git a/version/version.go b/version/version.go new file mode 100644 index 0000000..44192a0 --- /dev/null +++ b/version/version.go @@ -0,0 +1,23 @@ +package version + +import "fmt" + +var YapscanVersion = Version{ + Major: 0, + Minor: 12, + Bugfix: 0, +} + +type Version struct { + Major int + Minor int + Bugfix int +} + +func (v Version) String() string { + return fmt.Sprintf("%d.%d.%d", v.Major, v.Minor, v.Bugfix) +} + +func (v Version) MarshalJSON() ([]byte, error) { + return []byte(v.String()), nil +} From a472a539e3bc2bfe35ea19ac20bd2a99bd241206 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Tue, 4 Jan 2022 13:55:53 +0100 Subject: [PATCH 04/26] Fixes version marshaling --- version/version.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index 44192a0..590683d 100644 --- a/version/version.go +++ b/version/version.go @@ -19,5 +19,10 @@ func (v Version) String() string { } func (v Version) MarshalJSON() ([]byte, error) { - return []byte(v.String()), nil + s := v.String() + b := make([]byte, 0, len(s)+2) + b = append(b, '"') + b = append(b, s...) + b = append(b, '"') + return b, nil } From b27837959a4bd1e31351b470cf83e0467e9888a4 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Tue, 4 Jan 2022 13:56:59 +0100 Subject: [PATCH 05/26] Adds meta.json to report --- app/scan.go | 8 ++++- output/analysisReporter.go | 45 ++++++++++++------------- output/factory.go | 10 ++++-- report/meta.go | 67 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 27 deletions(-) create mode 100644 report/meta.go diff --git a/app/scan.go b/app/scan.go index f25926a..e28c39b 100644 --- a/app/scan.go +++ b/app/scan.go @@ -220,11 +220,17 @@ func scan(c *cli.Context) error { fmt.Printf("Dumps will be written to \"%s\".\n", dumpArchivePath) } + + tmpReporter, err := repFac.Build() + if err != nil { + return err + } + reporter = &output.MultiReporter{ Reporters: []output.Reporter{ reporter, &output.FilteringReporter{ - Reporter: repFac.Build(), + Reporter: tmpReporter, Filter: analysisFilter, }, }, diff --git a/output/analysisReporter.go b/output/analysisReporter.go index 49a8b9c..9ee46bc 100644 --- a/output/analysisReporter.go +++ b/output/analysisReporter.go @@ -8,30 +8,13 @@ import ( "github.com/fkie-cad/yapscan" "github.com/fkie-cad/yapscan/fileio" "github.com/fkie-cad/yapscan/procio" + "github.com/fkie-cad/yapscan/report" "github.com/fkie-cad/yapscan/system" "github.com/hillu/go-yara/v4" "github.com/sirupsen/logrus" "github.com/targodan/go-errors" ) -// SystemInfoFileName is the name of the file, where system info is stored. -const SystemInfoFileName = "systeminfo.json" - -// RulesFileName is the name of the file, where the used rules will be stored. -const RulesFileName = "rules.yarc" - -// ProcessFileName is the name of the file used to report information about processes. -const ProcessFileName = "processes.json" - -// MemoryProgressFileName is the name of the file used to report information about memory scans. -const MemoryProgressFileName = "memory-scans.json" - -// FSProgressFileName is the name of the file used to report information about file scans. -const FSProgressFileName = "file-scans.json" - -// ScanningStatisticsFileName is the name of the file used to report scanning. -const ScanningStatisticsFileName = "stats.json" - // AnalysisReporter implements a Reporter, which is // specifically intended for later analysis of the report // in order to determine rule quality. @@ -45,11 +28,25 @@ type AnalysisReporter struct { processInfos map[int]*procio.ProcessInfo } +func (r *AnalysisReporter) reportMeta() error { + w, err := r.archiver.Create(r.filenamePrefix + report.MetaFileName) + if err != nil { + return err + } + + err = json.NewEncoder(w).Encode(report.GetMetaInformation()) + if err != nil { + return errors.NewMultiError(err, w.Close()) + } + + return w.Close() +} + // ReportSystemInfo reports info about the running system. // This function may only called once, otherwise the behaviour depends on the // used Archiver. func (r *AnalysisReporter) ReportSystemInfo(info *system.Info) error { - w, err := r.archiver.Create(r.filenamePrefix + SystemInfoFileName) + w, err := r.archiver.Create(r.filenamePrefix + report.SystemInfoFileName) if err != nil { return err } @@ -66,7 +63,7 @@ func (r *AnalysisReporter) ReportSystemInfo(info *system.Info) error { // This function may only called once, otherwise the behaviour depends on the // used Archiver. func (r *AnalysisReporter) ReportScanningStatistics(stats *yapscan.ScanningStatistics) error { - w, err := r.archiver.Create(r.filenamePrefix + ScanningStatisticsFileName) + w, err := r.archiver.Create(r.filenamePrefix + report.ScanningStatisticsFileName) if err != nil { return err } @@ -83,7 +80,7 @@ func (r *AnalysisReporter) ReportScanningStatistics(stats *yapscan.ScanningStati // This function may only called once, otherwise the behaviour depends on the // used Archiver. func (r *AnalysisReporter) ReportRules(rules *yara.Rules) error { - w, err := r.archiver.Create(r.filenamePrefix + RulesFileName) + w, err := r.archiver.Create(r.filenamePrefix + report.RulesFileName) if err != nil { return err } @@ -97,7 +94,7 @@ func (r *AnalysisReporter) ReportRules(rules *yara.Rules) error { } func (r *AnalysisReporter) reportProcessInfos() error { - w, err := r.archiver.Create(r.filenamePrefix + ProcessFileName) + w, err := r.archiver.Create(r.filenamePrefix + report.ProcessFileName) if err != nil { return err } @@ -123,7 +120,7 @@ func (r *AnalysisReporter) reportProcessInfos() error { // This function may only called once, otherwise the behaviour depends on the // used Archiver. func (r *AnalysisReporter) ConsumeMemoryScanProgress(progress <-chan *yapscan.MemoryScanProgress) error { - w, err := r.archiver.Create(r.filenamePrefix + MemoryProgressFileName) + w, err := r.archiver.Create(r.filenamePrefix + report.MemoryProgressFileName) if err != nil { return err } @@ -187,7 +184,7 @@ func (r *AnalysisReporter) ConsumeMemoryScanProgress(progress <-chan *yapscan.Me // This function may only called once, otherwise the behaviour depends on the // used Archiver. func (r *AnalysisReporter) ConsumeFSScanProgress(progress <-chan *fileio.FSScanProgress) error { - w, err := r.archiver.Create(r.filenamePrefix + FSProgressFileName) + w, err := r.archiver.Create(r.filenamePrefix + report.FSProgressFileName) if err != nil { return err } diff --git a/output/factory.go b/output/factory.go index 904a1b0..c64b0c7 100644 --- a/output/factory.go +++ b/output/factory.go @@ -1,5 +1,7 @@ package output +import "github.com/targodan/go-errors" + type AnalysisReporterFactory struct { reporter *AnalysisReporter } @@ -27,6 +29,10 @@ func (f *AnalysisReporterFactory) WithFilenamePrefix(prefix string) *AnalysisRep return f } -func (f *AnalysisReporterFactory) Build() *AnalysisReporter { - return f.reporter +func (f *AnalysisReporterFactory) Build() (*AnalysisReporter, error) { + err := f.reporter.reportMeta() + if err != nil { + return nil, errors.NewMultiError(err, f.reporter.Close()) + } + return f.reporter, nil } diff --git a/report/meta.go b/report/meta.go new file mode 100644 index 0000000..086272e --- /dev/null +++ b/report/meta.go @@ -0,0 +1,67 @@ +package report + +import ( + "fmt" + "strings" + + "github.com/fkie-cad/yapscan/version" +) + +// SystemInfoFileName is the name of the file, where system info is stored. +const SystemInfoFileName = "systeminfo.json" + +// RulesFileName is the name of the file, where the used rules will be stored. +const RulesFileName = "rules.yarc" + +// ProcessFileName is the name of the file used to report information about processes. +const ProcessFileName = "processes.json" + +// MemoryProgressFileName is the name of the file used to report information about memory scans. +const MemoryProgressFileName = "memory-scans.json" + +// FSProgressFileName is the name of the file used to report information about file scans. +const FSProgressFileName = "file-scans.json" + +// ScanningStatisticsFileName is the name of the file used to report scanning. +const ScanningStatisticsFileName = "stats.json" + +// MetaFileName is the name of the file containing meta information about the report format. +const MetaFileName = "meta.json" + +var FormatVersion = version.Version{ + Major: 1, + Minor: 0, + Bugfix: 0, +} +var schemaURLBase = "https://yapscan.targodan.de/reportFormat/%s/%s" + +type MetaInformation struct { + YapscanVersion version.Version `json:"yapscanVersion"` + FormatVersion version.Version `json:"formatVersion"` + SchemaURLs map[string]string `json:"schemaURLs"` +} + +func generateSchemaURLs(files []string) map[string]string { + ret := make(map[string]string) + for _, file := range files { + fileParts := strings.Split(file, ".") + schemaFile := strings.Join(fileParts[0:len(fileParts)-1], ".") + ".schema." + fileParts[len(fileParts)-1] + ret[file] = fmt.Sprintf(schemaURLBase, FormatVersion, schemaFile) + } + return ret +} + +func GetMetaInformation() *MetaInformation { + return &MetaInformation{ + YapscanVersion: version.YapscanVersion, + FormatVersion: FormatVersion, + SchemaURLs: generateSchemaURLs([]string{ + SystemInfoFileName, + ProcessFileName, + MemoryProgressFileName, + FSProgressFileName, + ScanningStatisticsFileName, + MetaFileName, + }), + } +} From 2e1bfb34397285531b7f3fad0e74e8b63fada5ed Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Tue, 4 Jan 2022 13:58:48 +0100 Subject: [PATCH 06/26] Makes enums lowercase --- app/filter.go | 4 ++-- arch/bitness.go | 2 +- arch/bitness_enum.go | 2 +- procio/crash.go | 2 +- procio/crash_enum.go | 2 +- procio/memory.go | 22 +++++++++++----------- procio/memory_enum.go | 4 ++-- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/app/filter.go b/app/filter.go index 304e464..032eadd 100644 --- a/app/filter.go +++ b/app/filter.go @@ -60,7 +60,7 @@ func BuildFilterType(fStr []string) (yapscan.MemorySegmentFilter, error) { if s == "" { continue } - types[i], err = procio.ParseSegmentType(strings.ToUpper(s[0:1]) + strings.ToLower(s[1:])) + types[i], err = procio.ParseSegmentType(s) if err != nil { return nil, fmt.Errorf("could not parse type \"%s\", reason: %w", s, err) } @@ -81,7 +81,7 @@ func BuildFilterState(fStr []string) (yapscan.MemorySegmentFilter, error) { if s == "" { continue } - states[i], err = procio.ParseState(strings.ToUpper(s[0:1]) + strings.ToLower(s[1:])) + states[i], err = procio.ParseState(s) if err != nil { return nil, fmt.Errorf("could not parse state \"%s\", reason: %w", s, err) } diff --git a/arch/bitness.go b/arch/bitness.go index 50880e5..0cb4666 100644 --- a/arch/bitness.go +++ b/arch/bitness.go @@ -4,7 +4,7 @@ package arch // Bitness describes the bitness of an architecture. /* ENUM( -Invalid +invalid 32Bit = 32 64Bit = 64 ) diff --git a/arch/bitness_enum.go b/arch/bitness_enum.go index 1cd7355..7d7a2bb 100644 --- a/arch/bitness_enum.go +++ b/arch/bitness_enum.go @@ -20,7 +20,7 @@ const ( Bitness64Bit Bitness = iota + 62 ) -const _BitnessName = "Invalid32Bit64Bit" +const _BitnessName = "invalid32Bit64Bit" var _BitnessNames = []string{ _BitnessName[0:7], diff --git a/procio/crash.go b/procio/crash.go index a682202..09a2aa5 100644 --- a/procio/crash.go +++ b/procio/crash.go @@ -4,7 +4,7 @@ package procio // CrashMethod selects a method to crash a process. /* ENUM( -CreateThreadOnNull +createThreadOnNull ) */ type CrashMethod int diff --git a/procio/crash_enum.go b/procio/crash_enum.go index 88a64ba..8a43c15 100644 --- a/procio/crash_enum.go +++ b/procio/crash_enum.go @@ -16,7 +16,7 @@ const ( CrashMethodCreateThreadOnNull CrashMethod = iota ) -const _CrashMethodName = "CreateThreadOnNull" +const _CrashMethodName = "createThreadOnNull" var _CrashMethodNames = []string{ _CrashMethodName[0:18], diff --git a/procio/memory.go b/procio/memory.go index 3072345..d52a448 100644 --- a/procio/memory.go +++ b/procio/memory.go @@ -90,13 +90,13 @@ func (s *MemorySegmentInfo) CopyWithoutSubSegments() *MemorySegmentInfo { // Permissions describes the permissions of a memory segment. type Permissions struct { // Is read-only access allowed - Read bool `yaml:"read"` + Read bool `json:"read"` // Is write access allowed (also true if COW is enabled) - Write bool `yaml:"write"` + Write bool `json:"write"` // Is copy-on-write access allowed (if this is true, then so is Write) - COW bool `yaml:"cow"` + COW bool `json:"COW"` // Is execute access allowed - Execute bool `yaml:"execute"` + Execute bool `json:"execute"` } // PermR is readonly Permissions. @@ -223,9 +223,9 @@ func (p Permissions) String() string { // State represents the state of a memory segment. /* ENUM( -Commit -Free -Reserve +commit +free +reserve ) */ type State int @@ -233,10 +233,10 @@ type State int // SegmentType represents the type of a memory segment. /* ENUM( -Image -Mapped -Private -PrivateMapped +image +mapped +private +privateMapped ) */ type SegmentType int diff --git a/procio/memory_enum.go b/procio/memory_enum.go index a0a2b0e..aedceef 100644 --- a/procio/memory_enum.go +++ b/procio/memory_enum.go @@ -22,7 +22,7 @@ const ( SegmentTypePrivateMapped ) -const _SegmentTypeName = "ImageMappedPrivatePrivateMapped" +const _SegmentTypeName = "imagemappedprivateprivateMapped" var _SegmentTypeNames = []string{ _SegmentTypeName[0:5], @@ -97,7 +97,7 @@ const ( StateReserve ) -const _StateName = "CommitFreeReserve" +const _StateName = "commitfreereserve" var _StateNames = []string{ _StateName[0:6], From 7a6c619c35b4a3869d674ca5ee0e00c9039afbb8 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Tue, 4 Jan 2022 14:14:52 +0100 Subject: [PATCH 07/26] Updates mocks --- generate.sh | 12 ++---------- mock_MemoryScanner_test.go | 2 +- mock_MemorySegmentFilterFunc_test.go | 2 +- mock_MemorySegmentFilter_test.go | 2 +- mock_Rules_test.go | 2 +- mock_memoryReaderFactory_test.go | 2 +- mock_memoryReader_test.go | 2 +- mock_process_test.go | 2 +- mock_segmentScanner_test.go | 2 +- 9 files changed, 10 insertions(+), 18 deletions(-) diff --git a/generate.sh b/generate.sh index 0040b3d..d9d10f3 100755 --- a/generate.sh +++ b/generate.sh @@ -2,19 +2,11 @@ cd $(dirname "$0") || exit 1 -update="" if [[ "$1" == "-u" ]]; then - update="-u" + go install github.com/abice/go-enum@latest + go install github.com/vektra/mockery/v2@latest fi -go mod tidy -go mod vendor - -go get -v $update github.com/abice/go-enum -go get -v $update github.com/vektra/mockery/v2/.../ -go mod tidy - find . -name 'mock_*_test.go' -type f -delete go generate ./... - diff --git a/mock_MemoryScanner_test.go b/mock_MemoryScanner_test.go index e34d4e8..1eb4ba5 100644 --- a/mock_MemoryScanner_test.go +++ b/mock_MemoryScanner_test.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.9.4. DO NOT EDIT. package yapscan diff --git a/mock_MemorySegmentFilterFunc_test.go b/mock_MemorySegmentFilterFunc_test.go index 2d730cb..30a67d9 100644 --- a/mock_MemorySegmentFilterFunc_test.go +++ b/mock_MemorySegmentFilterFunc_test.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.9.4. DO NOT EDIT. package yapscan diff --git a/mock_MemorySegmentFilter_test.go b/mock_MemorySegmentFilter_test.go index 2df0b4b..ae2aeda 100644 --- a/mock_MemorySegmentFilter_test.go +++ b/mock_MemorySegmentFilter_test.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.9.4. DO NOT EDIT. package yapscan diff --git a/mock_Rules_test.go b/mock_Rules_test.go index a9a6965..20a910a 100644 --- a/mock_Rules_test.go +++ b/mock_Rules_test.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.9.4. DO NOT EDIT. package yapscan diff --git a/mock_memoryReaderFactory_test.go b/mock_memoryReaderFactory_test.go index 9713969..d7ec90c 100644 --- a/mock_memoryReaderFactory_test.go +++ b/mock_memoryReaderFactory_test.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.9.4. DO NOT EDIT. package yapscan diff --git a/mock_memoryReader_test.go b/mock_memoryReader_test.go index 0c65706..0d51c3a 100644 --- a/mock_memoryReader_test.go +++ b/mock_memoryReader_test.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.9.4. DO NOT EDIT. package yapscan diff --git a/mock_process_test.go b/mock_process_test.go index b544114..ee27a9d 100644 --- a/mock_process_test.go +++ b/mock_process_test.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.9.4. DO NOT EDIT. package yapscan diff --git a/mock_segmentScanner_test.go b/mock_segmentScanner_test.go index 66963fd..076513a 100644 --- a/mock_segmentScanner_test.go +++ b/mock_segmentScanner_test.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.9.4. DO NOT EDIT. package yapscan From 059b352895a768b4f9455f424a33353369d3042f Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Tue, 4 Jan 2022 14:17:05 +0100 Subject: [PATCH 08/26] Updates dependencies --- go.mod | 6 +++--- go.sum | 13 ++++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index 0d43bfa..dac8132 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/hillu/go-yara/v4 v4.1.0 github.com/klauspost/compress v1.13.6 github.com/kr/text v0.2.0 // indirect - github.com/mattn/go-colorable v0.1.11 // indirect + github.com/mattn/go-colorable v0.1.12 // indirect github.com/sirupsen/logrus v1.8.1 github.com/smartystreets/assertions v1.2.0 // indirect github.com/smartystreets/goconvey v1.6.4 @@ -20,8 +20,8 @@ require ( github.com/targodan/go-errors v1.0.0 github.com/urfave/cli/v2 v2.3.0 github.com/yeka/zip v0.0.0-20180914125537-d046722c6feb - golang.org/x/crypto v0.0.0-20211115234514-b4de73f9ece8 - golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c + golang.org/x/crypto v0.0.0-20211215165025-cf75a172585e + golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect ) diff --git a/go.sum b/go.sum index e5682d6..7a6b183 100644 --- a/go.sum +++ b/go.sum @@ -128,8 +128,8 @@ github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= -github.com/mattn/go-colorable v0.1.11 h1:nQ+aFkoE2TMGc0b68U2OKSexC+eq46+XwZzWXHRmPYs= -github.com/mattn/go-colorable v0.1.11/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= +github.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40= +github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= @@ -225,8 +225,8 @@ golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= -golang.org/x/crypto v0.0.0-20211115234514-b4de73f9ece8 h1:5QRxNnVsaJP6NAse0UdkRgL3zHMvCRRkrDVLNdNpdy4= -golang.org/x/crypto v0.0.0-20211115234514-b4de73f9ece8/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.0.0-20211215165025-cf75a172585e h1:1SzTfNOXwIS2oWiMF+6qu0OUDKb0dauo6MoDUQyu+yU= +golang.org/x/crypto v0.0.0-20211215165025-cf75a172585e/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -292,11 +292,10 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c h1:DHcbWVXeY+0Y8HHKR+rbLwnoh2F4tNCY7rTiHJ30RmA= -golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e h1:fLOSk5Q00efkSvAm+4xcoXD+RRmLmmulPn5I3Y9F2EM= +golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= From 938c8218b54974483211194e1183326e0133b8a2 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Tue, 4 Jan 2022 14:17:31 +0100 Subject: [PATCH 09/26] Adds report format definition --- report/README.md | 79 +++++++++++++++++++++++++++ report/file-scans.schema.json | 40 ++++++++++++++ report/match.schema.json | 40 ++++++++++++++ report/memory-scans.schema.json | 30 ++++++++++ report/meta.schema.json | 27 +++++++++ report/permissions.schema.json | 27 +++++++++ report/processes.schema.json | 97 +++++++++++++++++++++++++++++++++ report/stats.schema.json | 77 ++++++++++++++++++++++++++ report/systeminfo.schema.json | 50 +++++++++++++++++ 9 files changed, 467 insertions(+) create mode 100644 report/README.md create mode 100644 report/file-scans.schema.json create mode 100644 report/match.schema.json create mode 100644 report/memory-scans.schema.json create mode 100644 report/meta.schema.json create mode 100644 report/permissions.schema.json create mode 100644 report/processes.schema.json create mode 100644 report/stats.schema.json create mode 100644 report/systeminfo.schema.json diff --git a/report/README.md b/report/README.md new file mode 100644 index 0000000..e12469a --- /dev/null +++ b/report/README.md @@ -0,0 +1,79 @@ +# Yapscan Report Format + +The Yapscan report format is versioned independently of the Yapscan executable. +Its versioning is inspired by semantic versioning of the form `MAJOR.MINOR.BUGFIX`. +Changes to the different parts of the versioning promise different compatibility. + +- **MAJOR-Update:** + These updates would not promise any backwards or forwards compatibility. + Parsers might require close to a complete rewrite. + Switching from JSON to e.g. YAML would change the major version. +- **MINOR-Update:** + These updates promise backwards compatibility, with only small efforts on the parser implementation. + Renaming or deletion of new fields would lead to a MINOR-Update. + Also changes to the internal format of a field are allowed. + Addition or renaming of certain files in the container format, or changing the container format would result in a MINOR-Update. +- **BUGFIX-Update:** + These updates promise forward compatibility with no effort of the parser implementation and backwards compatibility with small efforts of the parser implementaiton. + Addition of fields would lead to a BUGFIX-Update. + If validation with the schemas is done, the schema URL might need updating. + Support for the new fields should be added, but the parser shouldn't break if you don't do this. + Any parser supporting version `n.m.i` should also work for any version `n.m.j`. + +## Container Format + +The container format is [TAR](https://en.wikipedia.org/wiki/Tar_(computing)) with [ZSTD](https://github.com/facebook/zstd) compression and optional [OpenPGP](https://www.openpgp.org/) encryption. +The encryption may be symmetric or asymmetric. + +A change to the container or encryption format would require a bump to the MAJOR-Version. + +This container contains a number of JSON-Files. +The format of each of these files is defined as JSON-Schema. +Note that the schemas in general are rather strict and do not reflect the compatibility promises from above. +This is done on purpose to have a more meaningful format-definition. +For actual validation, the schemas defined in the `meta.json` should be used (see below). +The only exception from this is the `meta.schema.json`, which is more lax to allow for early validation of the meta-file. + +### meta.json + +This file contains meta information about the report. +The `meta.json` has stricter promises regarding compatibility than the other files, as it is essential for parser implementations. +The `meta.json` will validate correctly against the [meta.schema.json of version 1.0.0](https://yapscan.targodan.de/reportFormat/1.0.0/meta.schema.json) for **any update except a MAJOR-Update**. +This means only the addition of fields to this file is allowed, not removal, renaming or changing of contents. + +Latest Schema: [meta.schema.json](https://yapscan.targodan.de/reportFormat/latest/meta.schema.json) + +### stats.json + +This file contains statistic information about the scan. + +Latest Schema: [stats.schema.json](https://yapscan.targodan.de/reportFormat/latest/stats.schema.json) + +### systeminfo.json + +This file contains information about the scanned system. + +Latest Schema: [systeminfo.schema.json](https://yapscan.targodan.de/reportFormat/latest/systeminfo.schema.json) + +### processes.json + +This file contains information about the scanned processes and their memory layouts. +There is one JSON-Object per line in this file (splitting on `'\n'` is safe). + +Latest Schema: [processes.schema.json](https://yapscan.targodan.de/reportFormat/latest/processes.schema.json) + +### memory-scans.json + +This file contains information about the scanned memory segments and any related yara rule matches. +There is one JSON-Object per line in this file (splitting on `'\n'` is safe). +It may be omitted if no memory was scanned. + +Latest Schema: [memory-scans.schema.json](https://yapscan.targodan.de/reportFormat/latest/memory-scans.schema.json) + +### file-scans.json + +This file contains information about the scanned files and any related yara rule matches. +There is one JSON-Object per line in this file (splitting on `'\n'` is safe). +It may be omitted if no files were scanned. + +Latest Schema: [file-scans.schema.json](https://yapscan.targodan.de/reportFormat/latest/file-scans.schema.json) diff --git a/report/file-scans.schema.json b/report/file-scans.schema.json new file mode 100644 index 0000000..575d775 --- /dev/null +++ b/report/file-scans.schema.json @@ -0,0 +1,40 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/match.schema.json", + "title": "file-scans", + "description": "Scan results of file-scans. For each scanned file, where either a match was found or an error was emitted, one JSON object per line is stored.", + "type": "object", + "properties": { + "file": { + "type": "object", + "description": "Information about the scanned file", + "properties": { + "SHA256": { + "type": "string", + "description": "SHA256 hexdigest of the executable file, if the file could be read" + }, + "path": { + "type": "string", + "description": "The path of the scanned file" + }, + "MD5": { + "type": "string", + "description": "MD5 hexdigest of the executable file, if the file could be read" + } + }, + "required": ["path"], + "additionalProperties": false + }, + "match": { + "type": "array", + "description": "Contains information about matched rules. Is empty-array if no rules matched.", + "items": { + "$ref": "https://yapscan.targodan.de/reportFormat/1.0.0/match.schema.json" + } + }, + "error": { + "type": ["string", "null"], + "description": "The error message or null if no error happened. Note, there may still be matches if an error happened." + } + } +} \ No newline at end of file diff --git a/report/match.schema.json b/report/match.schema.json new file mode 100644 index 0000000..57675c0 --- /dev/null +++ b/report/match.schema.json @@ -0,0 +1,40 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/match.schema.json", + "title": "match", + "description": "Information about a yara rule match", + "type": "object", + "properties": { + "strings": { + "type": "array", + "description": "The exact strings of the yara rule, that were found, including their offsets in the memory segment.", + "items": { + "properties": { + "offset": { + "type": "number", + "description": "The offset, where the string was found, relative to the start of the scanned memory segment or file. Note, this value can get very large. make sure your parser uses an int64." + }, + "name": { + "type": "string", + "description": "The name of the matched string as defined in the yara rule" + }, + "base": { + "type": "number" + } + }, + "required": ["offset", "name", "base"], + "additionalProperties": false + } + }, + "namespace": { + "type": "string", + "description": "The namespace of the matched yara rule. This depends on how the rules where compiled." + }, + "rule": { + "type": "string", + "description": "The name of the matched yara rule" + } + }, + "required": ["strings", "namespace", "rule"], + "additionalProperties": false +} \ No newline at end of file diff --git a/report/memory-scans.schema.json b/report/memory-scans.schema.json new file mode 100644 index 0000000..7871016 --- /dev/null +++ b/report/memory-scans.schema.json @@ -0,0 +1,30 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/memory-scans.schema.json", + "title": "memory-scans", + "description": "Memory scan results. For each scanned memory section, where either a match was found or an error was emitted, one JSON object per line is stored.", + "type": "object", + "properties": { + "match": { + "type": "array", + "description": "Contains information about matched rules. Is empty-array if no rules matched.", + "items": { + "$ref": "https://yapscan.targodan.de/reportFormat/1.0.0/match.schema.json" + } + }, + "pid": { + "type": "integer", + "description": "PID of the scanned process" + }, + "error": { + "type": ["string", "null"], + "description": "The error message or null if no error happened. Note, there may still be matches if an error happened." + }, + "memorySegment": { + "type": "integer", + "description": "The base address of the scanned memory segment. This can be used to resolve the memory segment information in the processes.json. Note, this value can get very large. make sure your parser uses an int64." + } + }, + "required": ["match", "pid", "error", "memorySegment"], + "additionalProperties": false +} \ No newline at end of file diff --git a/report/meta.schema.json b/report/meta.schema.json new file mode 100644 index 0000000..5b7afd1 --- /dev/null +++ b/report/meta.schema.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/meta.schema.json", + "title": "meta", + "description": "Metainformation about the yapscan report.", + "type": "object", + "properties": { + "yapscanVersion": { + "type": "string", + "description": "Version of Yapscan, used to generate the report" + }, + "schemaURLs": { + "type": "object", + "description": "SchemaURLs for the files of the report. There is one schema link for each JSON file contained in the report.", + "patternProperties": { + "\\.json$": { + "type": "string" + } + } + }, + "formatVersion": { + "type": "string", + "description": "Version of the report format" + } + }, + "required": ["yapscanVersion", "schemaURLs", "formatVersion"] +} \ No newline at end of file diff --git a/report/permissions.schema.json b/report/permissions.schema.json new file mode 100644 index 0000000..32732d6 --- /dev/null +++ b/report/permissions.schema.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/permissions.schema.json", + "title": "permissions", + "description": "Permissions of a memory segment", + "type": "object", + "properties": { + "read": { + "type": "boolean", + "description": "True, if readable" + }, + "write": { + "type": "boolean", + "description": "True, if writable. If COW is true, this will be as well." + }, + "COW": { + "type": "boolean", + "description": "True, if the Copy-On-Write flag is set." + }, + "execute": { + "type": "boolean", + "description": "True, if executable" + } + }, + "required": ["read", "write", "COW", "execute"], + "additionalProperties": false +} \ No newline at end of file diff --git a/report/processes.schema.json b/report/processes.schema.json new file mode 100644 index 0000000..e0b5870 --- /dev/null +++ b/report/processes.schema.json @@ -0,0 +1,97 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/processes.schema.json", + "title": "processes", + "description": "Information about running processes of a scanned system", + "type": "object", + "properties": { + "pid": { + "type": "integer", + "description": "PID of the process" + }, + "bitness": { + "enum": ["invalid", "64Bit", "32Bit"], + "description": "Bitness of the process" + }, + "executablePath": { + "type": ["string", "null"], + "description": "Path to the executable file of the process, if it could be determined" + }, + "executableMD5": { + "oneOf": [ + { + "type": "string", + "pattern": "^[a-f0-9]{32}$" + }, + { + "type": "null" + } + ], + "description": "MD5 hexdigest of the executable file, if the file could be read" + }, + "executableSHA256": { + "oneOf": [ + { + "type": "string", + "pattern": "^[a-f0-9]{64}$" + }, + { + "type": "null" + } + ], + "description": "SHA256 hexdigest of the executable file, if the file could be read" + }, + "username": { + "type": "string", + "description": "Name of the user, which the process is executed under" + }, + "memorySegments": { + "type": "array", + "items": { + "type": "object", + "properties": { + "parentBaseAddress": { + "type": "integer", + "description": "Address of the parent segment. This is equal to baseAddress if the segment is a root segment. Note, this value can get very large. make sure your parser uses an int64." + }, + "baseAddress": { + "type": "integer", + "description": "Address of this segment. Note, this value can get very large. make sure your parser uses an int64." + }, + "allocatedPermissions": { + "$ref": "https://yapscan.targodan.de/reportFormat/1.0.0/permissions.schema.json", + "description": "The permissions, this segment was initialized with" + }, + "currentPermissions": { + "$ref": "https://yapscan.targodan.de/reportFormat/1.0.0/permissions.schema.json", + "description": "The permissions, this segment had during time of the scan" + }, + "size": { + "type": "integer", + "description": "Size of the segment in bytes. Note, this value can get very large. make sure your parser uses an int64." + }, + "rss": { + "type": "integer", + "description": "The resident set size (RSS) of the segment in bytes. Only applicable on linux. Note, this value can get very large. make sure your parser uses an int64." + }, + "state": { + "enum": ["commit", "free", "reserve"], + "description": "The state of the segment. Note that the state \"reserve\" is an approximation on linux; this will be set if the RSS is exactly zero." + }, + "type": { + "enum": ["image", "mapped", "private", "privateMapped"], + "description": "The type of the memory segment" + }, + "mappedFile": { + "type": ["string", "null"], + "description": "The path to the mapped file, if the memory segment has a backing-file" + } + }, + "required": ["parentBaseAddress", "baseAddress", "allocatedPermissions", "currentPermissions", "size", "rss", "state", "type", "mappedFile"], + "additionalProperties": false + } + } + }, + "required": ["pid", "bitness", "executablePath", "executableMD5", "executableSHA256", "username", "memorySegments"], + "additionalProperties": false +} diff --git a/report/stats.schema.json b/report/stats.schema.json new file mode 100644 index 0000000..cfb98e1 --- /dev/null +++ b/report/stats.schema.json @@ -0,0 +1,77 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/stats.schema.json", + "title": "stats", + "description": "Statistic information about the Yapscan run", + "type": "object", + "properties": { + "numberOfFileBytesScanned": { + "type": "integer", + "description": "Number of total bytes scanned from files. Note, this value can get very large. make sure your parser uses an int64." + }, + "start": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}[+-]\\d{2}:\\d{2}$", + "description": "Datetime of the start of the scan. Format is RFC3339 with added micro seconds." + }, + "profilingInformation": { + "type": "array", + "items": { + "properties": { + "freeRAM": { + "type": "integer", + "description": "The currently free RAM in bytes. Note, this value can get very large. make sure your parser uses an int64." + }, + "freeSwap": { + "type": "integer", + "description": "The currently free swap in bytes. Note, this value can get very large. make sure your parser uses an int64." + }, + "loadAvgFifteenMinutes": { + "type": "number", + "description": "The load average over the last fifteen minutes, normalized over the number of CPUs. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first 15 minutes of the scan.", + "minimum": 0.0, + "maximum": 1.0 + }, + "loadAvgOneMinute": { + "type": "number", + "description": "The load average over the last minute, normalized over the number of CPUs. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first minute of the scan.", + "minimum": 0.0, + "maximum": 1.0 + }, + "loadAvgFiveMinutes": { + "type": "number", + "description": "The load average over the last five minutes, normalized over the number of CPUs. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first 5 minutes of the scan.", + "minimum": 0.0, + "maximum": 1.0 + }, + "time": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}[+-]\\d{2}:\\d{2}$", + "description": "Datetime of the stats snapshot. Format is RFC3339 with added micro seconds." + } + } + } + }, + "end": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}[+-]\\d{2}:\\d{2}$", + "description": "Datetime of the start of the scan. Format is RFC3339 with added micro seconds." + }, + "numberOfProcessesScanned": { + "type": "number", + "description": "The number of scanned processes" + }, + "numberOfSegmentsScanned": { + "type": "number", + "description": "The number of scanned memory segments" + }, + "numberOfMemoryBytesScanned": { + "type": "number", + "description": "Number of total bytes scanned from memory. Note, this value can get very large. make sure your parser uses an int64." + }, + "numberOfFilesScanned": { + "type": "number", + "description": "The number of scanned files" + } + } +} \ No newline at end of file diff --git a/report/systeminfo.schema.json b/report/systeminfo.schema.json new file mode 100644 index 0000000..be42395 --- /dev/null +++ b/report/systeminfo.schema.json @@ -0,0 +1,50 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/systeminfo.schema.json", + "title": "Yapscan Systeminfo", + "description": "System information gathered by Yapscan", + "type": "object", + "properties": { + "totalSwap": { + "type": "integer", + "description": "Total swap capacity in bytes. Note, this value can get very large. make sure your parser uses an int64." + }, + "hostname": { + "type": "string", + "description": "The hostname of the scanned system" + }, + "osVersion": { + "type": "string", + "description": "Operating system version" + }, + "numCPUs": { + "type": "integer", + "description": "Total number of CPUs/Cores" + }, + "osArch": { + "enum": ["invalid", "amd64", "i386"], + "description": "The operating system's architecture" + }, + "osName": { + "type": "string", + "description": "Name of the operating system" + }, + "osFlavour": { + "type": "string", + "description": "Flavour of the operating system" + }, + "ips": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of local IPs" + }, + "totalRAM": { + "type": "integer", + "description": "Total installed RAM in bytes. Note, this value can get very large. make sure your parser uses an int64." + } + }, + "required": ["totalSwap", "hostname", "osVersion", "numCPUs", "osArch", "osName", "osFlavour", "ips", "totalRAM"], + "additionalProperties": false +} \ No newline at end of file From 5b3730808aedca3e0bedefa5271f713ff4e5034c Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Tue, 4 Jan 2022 14:22:48 +0100 Subject: [PATCH 10/26] Moves format docs to versioned dirs --- report/latest | 1 + report/{ => v1.0.0}/README.md | 0 report/{ => v1.0.0}/file-scans.schema.json | 0 report/{ => v1.0.0}/match.schema.json | 0 report/{ => v1.0.0}/memory-scans.schema.json | 0 report/{ => v1.0.0}/meta.schema.json | 0 report/{ => v1.0.0}/permissions.schema.json | 0 report/{ => v1.0.0}/processes.schema.json | 0 report/{ => v1.0.0}/stats.schema.json | 0 report/{ => v1.0.0}/systeminfo.schema.json | 0 10 files changed, 1 insertion(+) create mode 120000 report/latest rename report/{ => v1.0.0}/README.md (100%) rename report/{ => v1.0.0}/file-scans.schema.json (100%) rename report/{ => v1.0.0}/match.schema.json (100%) rename report/{ => v1.0.0}/memory-scans.schema.json (100%) rename report/{ => v1.0.0}/meta.schema.json (100%) rename report/{ => v1.0.0}/permissions.schema.json (100%) rename report/{ => v1.0.0}/processes.schema.json (100%) rename report/{ => v1.0.0}/stats.schema.json (100%) rename report/{ => v1.0.0}/systeminfo.schema.json (100%) diff --git a/report/latest b/report/latest new file mode 120000 index 0000000..60453e6 --- /dev/null +++ b/report/latest @@ -0,0 +1 @@ +v1.0.0 \ No newline at end of file diff --git a/report/README.md b/report/v1.0.0/README.md similarity index 100% rename from report/README.md rename to report/v1.0.0/README.md diff --git a/report/file-scans.schema.json b/report/v1.0.0/file-scans.schema.json similarity index 100% rename from report/file-scans.schema.json rename to report/v1.0.0/file-scans.schema.json diff --git a/report/match.schema.json b/report/v1.0.0/match.schema.json similarity index 100% rename from report/match.schema.json rename to report/v1.0.0/match.schema.json diff --git a/report/memory-scans.schema.json b/report/v1.0.0/memory-scans.schema.json similarity index 100% rename from report/memory-scans.schema.json rename to report/v1.0.0/memory-scans.schema.json diff --git a/report/meta.schema.json b/report/v1.0.0/meta.schema.json similarity index 100% rename from report/meta.schema.json rename to report/v1.0.0/meta.schema.json diff --git a/report/permissions.schema.json b/report/v1.0.0/permissions.schema.json similarity index 100% rename from report/permissions.schema.json rename to report/v1.0.0/permissions.schema.json diff --git a/report/processes.schema.json b/report/v1.0.0/processes.schema.json similarity index 100% rename from report/processes.schema.json rename to report/v1.0.0/processes.schema.json diff --git a/report/stats.schema.json b/report/v1.0.0/stats.schema.json similarity index 100% rename from report/stats.schema.json rename to report/v1.0.0/stats.schema.json diff --git a/report/systeminfo.schema.json b/report/v1.0.0/systeminfo.schema.json similarity index 100% rename from report/systeminfo.schema.json rename to report/v1.0.0/systeminfo.schema.json From 0ff224b7b6d395f32f9d886e563d8ec579be0fec Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Tue, 4 Jan 2022 14:51:12 +0100 Subject: [PATCH 11/26] Adds version unmarshaling --- report/time.go | 10 +++++++++- version/version.go | 26 +++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/report/time.go b/report/time.go index 4a0a59b..4449acb 100644 --- a/report/time.go +++ b/report/time.go @@ -1,6 +1,8 @@ package report import ( + "encoding/json" + "fmt" "time" ) @@ -23,7 +25,13 @@ func (t Time) MarshalJSON() ([]byte, error) { } func (t *Time) UnmarshalJSON(b []byte) error { - tmp, err := time.Parse(`"`+Format+`"`, string(b)) + var s string + err := json.Unmarshal(b, &s) + if err != nil { + return fmt.Errorf("expected a JSON-string as Time, %w", err) + } + + tmp, err := time.Parse(Format, string(b)) t.Time = tmp return err } diff --git a/version/version.go b/version/version.go index 590683d..f8199e9 100644 --- a/version/version.go +++ b/version/version.go @@ -1,6 +1,11 @@ package version -import "fmt" +import ( + "encoding/json" + "fmt" + "strconv" + "strings" +) var YapscanVersion = Version{ Major: 0, @@ -26,3 +31,22 @@ func (v Version) MarshalJSON() ([]byte, error) { b = append(b, '"') return b, nil } + +func (v *Version) UnmarshalJSON(b []byte) error { + var s string + err := json.Unmarshal(b, &s) + if err != nil { + return fmt.Errorf("expected a JSON-string as Version, %w", err) + } + + parts := strings.Split(s, ".") + if len(parts) != 3 { + return fmt.Errorf("expected exactly 3 dot-separated parts as version string, got %d", len(parts)) + } + + v.Major, err = strconv.Atoi(parts[0]) + v.Minor, err = strconv.Atoi(parts[1]) + v.Bugfix, err = strconv.Atoi(parts[2]) + + return nil +} From 3291231cdc5fb15de2ddde59ebe29fc1510d2d11 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Tue, 4 Jan 2022 14:51:29 +0100 Subject: [PATCH 12/26] Extracts more of the report --- acceptanceTests/reports_test.go | 18 ++-- output/analysisReporter.go | 4 +- output/output.go | 42 ++-------- report/report.go | 140 ++++++++++++++++++++++++++++++++ 4 files changed, 158 insertions(+), 46 deletions(-) create mode 100644 report/report.go diff --git a/acceptanceTests/reports_test.go b/acceptanceTests/reports_test.go index 2760c82..f55e68d 100644 --- a/acceptanceTests/reports_test.go +++ b/acceptanceTests/reports_test.go @@ -15,13 +15,13 @@ import ( "testing" "testing/quick" + "github.com/fkie-cad/yapscan/report" + "github.com/fkie-cad/yapscan/procio" "github.com/fkie-cad/yapscan/system" "golang.org/x/crypto/openpgp" - "github.com/fkie-cad/yapscan/output" - "github.com/klauspost/compress/zstd" "github.com/fkie-cad/yapscan/app" @@ -534,13 +534,13 @@ func conveyReportHasMatch(c C, pid int, addressOfData uintptr, memoryScansJSON * foundCorrectMatch := false var err error for { - report := new(output.MemoryScanProgressReport) - err = dec.Decode(report) + re := new(report.MemoryScan) + err = dec.Decode(re) if err != nil { break } - if report.PID == pid && report.MemorySegment == addressOfData && len(report.Matches) > 0 { + if re.PID == pid && re.MemorySegment == addressOfData && len(re.Matches) > 0 { foundCorrectMatch = true } } @@ -556,15 +556,15 @@ func conveyReportDoesNotHaveMatch(c C, pid int, addressOfData uintptr, memorySca foundMatchForAddressInPID := false var err error for { - report := new(output.MemoryScanProgressReport) - err = dec.Decode(report) + re := new(report.MemoryScan) + err = dec.Decode(re) if err != nil { break } - if report.PID == pid && len(report.Matches) > 0 { + if re.PID == pid && len(re.Matches) > 0 { foundMatchForPID = true - if report.MemorySegment == addressOfData { + if re.MemorySegment == addressOfData { foundMatchForAddressInPID = true } } diff --git a/output/analysisReporter.go b/output/analysisReporter.go index 9ee46bc..135c3a8 100644 --- a/output/analysisReporter.go +++ b/output/analysisReporter.go @@ -156,7 +156,7 @@ func (r *AnalysisReporter) ConsumeMemoryScanProgress(progress <-chan *yapscan.Me if prog.Error != nil { jsonErr = prog.Error.Error() } - err = encoder.Encode(&MemoryScanProgressReport{ + err = encoder.Encode(&report.MemoryScan{ PID: info.PID, MemorySegment: prog.MemorySegment.BaseAddress, Matches: ConvertYaraMatchRules(prog.Matches), @@ -206,7 +206,7 @@ func (r *AnalysisReporter) ConsumeFSScanProgress(progress <-chan *fileio.FSScanP } } - err = encoder.Encode(&FSScanProgressReport{ + err = encoder.Encode(&report.FileScan{ File: prog.File, Matches: ConvertYaraMatchRules(prog.Matches), Error: jsonErr, diff --git a/output/output.go b/output/output.go index ef743fb..483b0c2 100644 --- a/output/output.go +++ b/output/output.go @@ -3,6 +3,8 @@ package output import ( "io" + "github.com/fkie-cad/yapscan/report" + "github.com/fkie-cad/yapscan" "github.com/fkie-cad/yapscan/fileio" "github.com/fkie-cad/yapscan/system" @@ -19,32 +21,18 @@ type Reporter interface { io.Closer } -// Match represents the match of a yara Rule. -type Match struct { - Rule string `json:"rule"` - Namespace string `json:"namespace"` - Strings []*MatchString `json:"strings"` -} - -// A MatchString represents a string declared and matched in a rule. -type MatchString struct { - Name string `json:"name"` - Base uint64 `json:"base"` - Offset uint64 `json:"offset"` -} - // ConvertYaraMatchRules converts the given slice of yara.MatchRule to // a slice of *Match. -func ConvertYaraMatchRules(mr []yara.MatchRule) []*Match { - ret := make([]*Match, len(mr)) +func ConvertYaraMatchRules(mr []yara.MatchRule) []*report.Match { + ret := make([]*report.Match, len(mr)) for i, match := range mr { - ret[i] = &Match{ + ret[i] = &report.Match{ Rule: match.Rule, Namespace: match.Namespace, - Strings: make([]*MatchString, len(match.Strings)), + Strings: make([]*report.MatchString, len(match.Strings)), } for j, s := range match.Strings { - ret[i].Strings[j] = &MatchString{ + ret[i].Strings[j] = &report.MatchString{ Name: s.Name, Base: s.Base, Offset: s.Offset, @@ -53,19 +41,3 @@ func ConvertYaraMatchRules(mr []yara.MatchRule) []*Match { } return ret } - -// MemoryScanProgressReport represents all matches on a single memory -// segment of a process. -type MemoryScanProgressReport struct { - PID int `json:"pid"` - MemorySegment uintptr `json:"memorySegment"` - Matches []*Match `json:"match"` - Error interface{} `json:"error"` -} - -// FSScanProgressReport represents all matches on a file. -type FSScanProgressReport struct { - File fileio.File `json:"file"` - Matches []*Match `json:"match"` - Error interface{} `json:"error"` -} diff --git a/report/report.go b/report/report.go new file mode 100644 index 0000000..9973b2c --- /dev/null +++ b/report/report.go @@ -0,0 +1,140 @@ +package report + +import ( + "github.com/fkie-cad/yapscan/arch" + "github.com/fkie-cad/yapscan/fileio" + "github.com/fkie-cad/yapscan/procio" +) + +type Report struct { + Meta *MetaInformation + Stats *ScanningStatistics + SystemInfo *SystemInfo + Processes []*ProcessInfo + MemoryScans []*MemoryScan + FileScans []*FileScan +} + +type ProfilingInformation struct { + Time Time `json:"time"` + FreeRAM uintptr `json:"freeRAM"` + FreeSwap uintptr `json:"freeSwap"` + LoadAvgOneMinute float64 `json:"loadAvgOneMinute"` + LoadAvgFiveMinutes float64 `json:"loadAvgFiveMinutes"` + LoadAvgFifteenMinutes float64 `json:"loadAvgFifteenMinutes"` +} + +// ScanningStatistics holds statistic information about a scan. +type ScanningStatistics struct { + Start Time `json:"start"` + End Time `json:"end"` + NumberOfProcessesScanned uint64 `json:"numberOfProcessesScanned"` + NumberOfSegmentsScanned uint64 `json:"numberOfSegmentsScanned"` + NumberOfMemoryBytesScanned uint64 `json:"numberOfMemoryBytesScanned"` + NumberOfFileBytesScanned uint64 `json:"numberOfFileBytesScanned"` + NumberOfFilesScanned uint64 `json:"numberOfFilesScanned"` + ProfilingInformation []*ProfilingInformation `json:"profilingInformation"` +} + +// ProcessInfo represents information about a Process. +type ProcessInfo struct { + PID int `json:"pid"` + Bitness arch.Bitness `json:"bitness"` + ExecutablePath string `json:"executablePath"` + ExecutableMD5 string `json:"executableMD5"` + ExecutableSHA256 string `json:"executableSHA256"` + Username string `json:"username"` + MemorySegments []*MemorySegmentInfo `json:"memorySegments"` +} + +// MemorySegmentInfo contains information about a memory segment. +type MemorySegmentInfo struct { + // ParentBaseAddress is the base address of the parent segment. + // If no parent segment exists, this is equal to the BaseAddress. + // Equivalence on windows: _MEMORY_BASIC_INFORMATION->AllocationBase + ParentBaseAddress uintptr `json:"parentBaseAddress"` + + // BaseAddress is the base address of the current memory segment. + // Equivalence on windows: _MEMORY_BASIC_INFORMATION->BaseAddress + BaseAddress uintptr `json:"baseAddress"` + + // AllocatedPermissions is the Permissions that were used to initially + // allocate this segment. + // Equivalence on windows: _MEMORY_BASIC_INFORMATION->AllocationProtect + AllocatedPermissions procio.Permissions `json:"allocatedPermissions"` + + // CurrentPermissions is the Permissions that the segment currently has. + // This may differ from AllocatedPermissions if the permissions where changed + // at some point (e.g. via VirtualProtect). + // Equivalence on windows: _MEMORY_BASIC_INFORMATION->Protect + CurrentPermissions procio.Permissions `json:"currentPermissions"` + + // Size contains the size of the segment in bytes. + // Equivalence on windows: _MEMORY_BASIC_INFORMATION->RegionSize + Size uintptr `json:"size"` + + // RSS contains the ResidentSetSize as reported on linux, i.e. + // the amount of RAM this segment actually uses right now. + // Equivalence on windows: No equivalence, this is currently always equal to Size. + RSS uintptr `json:"rss"` + + // State contains the current State of the segment. + // Equivalence on windows: _MEMORY_BASIC_INFORMATION->State + State procio.State `json:"state"` + + // Type contains the Type of the segment. + // Equivalence on windows: _MEMORY_BASIC_INFORMATION->SegmentType + Type procio.SegmentType `json:"type"` + + // File contains the path to the mapped file, or empty string if + // no file mapping is associated with this memory segment. + MappedFile fileio.File `json:"mappedFile"` + + // SubSegments contains sub-segments, i.e. segment where their ParentBaseAddress + // is equal to this segments BaseAddress. + // If no such segments exist, this will be a slice of length 0. + SubSegments []*MemorySegmentInfo `json:"subSegments"` +} + +// SystemInfo contains information about the running system. +type SystemInfo struct { + OSName string `json:"osName"` + OSVersion string `json:"osVersion"` + OSFlavour string `json:"osFlavour"` + OSArch arch.T `json:"osArch"` + Hostname string `json:"hostname"` + IPs []string `json:"ips"` + NumCPUs int `json:"numCPUs"` + TotalRAM uintptr `json:"totalRAM"` + TotalSwap uintptr `json:"totalSwap"` +} + +// MemoryScan represents all matches on a single memory +// segment of a process. +type MemoryScan struct { + PID int `json:"pid"` + MemorySegment uintptr `json:"memorySegment"` + Matches []*Match `json:"match"` + Error interface{} `json:"error"` +} + +// FileScan represents all matches on a file. +type FileScan struct { + File fileio.File `json:"file"` + Matches []*Match `json:"match"` + Error interface{} `json:"error"` +} + +// Match represents the match of a yara Rule. +type Match struct { + Rule string `json:"rule"` + Namespace string `json:"namespace"` + Strings []*MatchString `json:"strings"` +} + +// A MatchString represents a string declared and matched in a rule. +type MatchString struct { + Name string `json:"name"` + Base uint64 `json:"base"` + Offset uint64 `json:"offset"` +} From d1260aa53237aa8ce7a847662f00bfacabed2301 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Wed, 5 Jan 2022 15:48:28 +0100 Subject: [PATCH 13/26] Falttens memory subsegment before reporting --- output/analysisReporter.go | 19 ++++++++++++++++--- procio/memory.go | 2 +- report/report.go | 5 ----- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/output/analysisReporter.go b/output/analysisReporter.go index 135c3a8..10c1ba8 100644 --- a/output/analysisReporter.go +++ b/output/analysisReporter.go @@ -93,8 +93,19 @@ func (r *AnalysisReporter) ReportRules(rules *yara.Rules) error { return w.Close() } +func (r *AnalysisReporter) flattenSubsegments(segments []*procio.MemorySegmentInfo) []*procio.MemorySegmentInfo { + newSegments := make([]*procio.MemorySegmentInfo, 0, len(segments)) + for _, seg := range segments { + if len(seg.SubSegments) > 0 { + subSegments := r.flattenSubsegments(seg.SubSegments) + newSegments = append(newSegments, subSegments...) + } + } + return newSegments +} + func (r *AnalysisReporter) reportProcessInfos() error { - w, err := r.archiver.Create(r.filenamePrefix + report.ProcessFileName) + w, err := r.archiver.Create(r.filenamePrefix + report.ProcessesFileName) if err != nil { return err } @@ -106,6 +117,8 @@ func (r *AnalysisReporter) reportProcessInfos() error { encoder := json.NewEncoder(w) for _, info := range r.processInfos { + info.MemorySegments = r.flattenSubsegments(info.MemorySegments) + err = encoder.Encode(info) if err != nil { logrus.WithError(err).Error("Could not report process info.") @@ -120,7 +133,7 @@ func (r *AnalysisReporter) reportProcessInfos() error { // This function may only called once, otherwise the behaviour depends on the // used Archiver. func (r *AnalysisReporter) ConsumeMemoryScanProgress(progress <-chan *yapscan.MemoryScanProgress) error { - w, err := r.archiver.Create(r.filenamePrefix + report.MemoryProgressFileName) + w, err := r.archiver.Create(r.filenamePrefix + report.MemoryScansFileName) if err != nil { return err } @@ -184,7 +197,7 @@ func (r *AnalysisReporter) ConsumeMemoryScanProgress(progress <-chan *yapscan.Me // This function may only called once, otherwise the behaviour depends on the // used Archiver. func (r *AnalysisReporter) ConsumeFSScanProgress(progress <-chan *fileio.FSScanProgress) error { - w, err := r.archiver.Create(r.filenamePrefix + report.FSProgressFileName) + w, err := r.archiver.Create(r.filenamePrefix + report.FileScansFileName) if err != nil { return err } diff --git a/procio/memory.go b/procio/memory.go index d52a448..5689041 100644 --- a/procio/memory.go +++ b/procio/memory.go @@ -54,7 +54,7 @@ type MemorySegmentInfo struct { // SubSegments contains sub-segments, i.e. segment where their ParentBaseAddress // is equal to this segments BaseAddress. // If no such segments exist, this will be a slice of length 0. - SubSegments []*MemorySegmentInfo `json:"subSegments"` + SubSegments []*MemorySegmentInfo `json:"-"` } // EstimateRAMIncreaseByScanning estimates the increase in RAM usage when diff --git a/report/report.go b/report/report.go index 9973b2c..421faa6 100644 --- a/report/report.go +++ b/report/report.go @@ -89,11 +89,6 @@ type MemorySegmentInfo struct { // File contains the path to the mapped file, or empty string if // no file mapping is associated with this memory segment. MappedFile fileio.File `json:"mappedFile"` - - // SubSegments contains sub-segments, i.e. segment where their ParentBaseAddress - // is equal to this segments BaseAddress. - // If no such segments exist, this will be a slice of length 0. - SubSegments []*MemorySegmentInfo `json:"subSegments"` } // SystemInfo contains information about the running system. From ac98252f96fcf1f8edeb4b49d9808ff570264d70 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Wed, 5 Jan 2022 15:49:06 +0100 Subject: [PATCH 14/26] Fixes schemas --- report/meta.go | 27 +++++++++++++---------- report/v1.0.0/file-scans.schema.json | 4 ++-- report/v1.0.0/match.schema.json | 2 +- report/v1.0.0/memory-scans.schema.json | 4 ++-- report/v1.0.0/meta.schema.json | 2 +- report/v1.0.0/permissions.schema.json | 2 +- report/v1.0.0/processes.schema.json | 30 +++++++++++++++++++++----- report/v1.0.0/stats.schema.json | 2 +- report/v1.0.0/systeminfo.schema.json | 2 +- 9 files changed, 50 insertions(+), 25 deletions(-) diff --git a/report/meta.go b/report/meta.go index 086272e..b42197e 100644 --- a/report/meta.go +++ b/report/meta.go @@ -13,14 +13,14 @@ const SystemInfoFileName = "systeminfo.json" // RulesFileName is the name of the file, where the used rules will be stored. const RulesFileName = "rules.yarc" -// ProcessFileName is the name of the file used to report information about processes. -const ProcessFileName = "processes.json" +// ProcessesFileName is the name of the file used to report information about processes. +const ProcessesFileName = "processes.json" -// MemoryProgressFileName is the name of the file used to report information about memory scans. -const MemoryProgressFileName = "memory-scans.json" +// MemoryScansFileName is the name of the file used to report information about memory scans. +const MemoryScansFileName = "memory-scans.json" -// FSProgressFileName is the name of the file used to report information about file scans. -const FSProgressFileName = "file-scans.json" +// FileScansFileName is the name of the file used to report information about file scans. +const FileScansFileName = "file-scans.json" // ScanningStatisticsFileName is the name of the file used to report scanning. const ScanningStatisticsFileName = "stats.json" @@ -33,7 +33,12 @@ var FormatVersion = version.Version{ Minor: 0, Bugfix: 0, } -var schemaURLBase = "https://yapscan.targodan.de/reportFormat/%s/%s" + +const schemaURLBase = "https://yapscan.targodan.de/reportFormat" + +var schemaURLFormat = schemaURLBase + "/v%s/%s" + +var MetaV1Schema = fmt.Sprintf(schemaURLFormat, "1.0.0", "meta.schema.json") type MetaInformation struct { YapscanVersion version.Version `json:"yapscanVersion"` @@ -46,7 +51,7 @@ func generateSchemaURLs(files []string) map[string]string { for _, file := range files { fileParts := strings.Split(file, ".") schemaFile := strings.Join(fileParts[0:len(fileParts)-1], ".") + ".schema." + fileParts[len(fileParts)-1] - ret[file] = fmt.Sprintf(schemaURLBase, FormatVersion, schemaFile) + ret[file] = fmt.Sprintf(schemaURLFormat, FormatVersion, schemaFile) } return ret } @@ -57,9 +62,9 @@ func GetMetaInformation() *MetaInformation { FormatVersion: FormatVersion, SchemaURLs: generateSchemaURLs([]string{ SystemInfoFileName, - ProcessFileName, - MemoryProgressFileName, - FSProgressFileName, + ProcessesFileName, + MemoryScansFileName, + FileScansFileName, ScanningStatisticsFileName, MetaFileName, }), diff --git a/report/v1.0.0/file-scans.schema.json b/report/v1.0.0/file-scans.schema.json index 575d775..e08203e 100644 --- a/report/v1.0.0/file-scans.schema.json +++ b/report/v1.0.0/file-scans.schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/match.schema.json", + "$id": "https://yapscan.targodan.de/reportFormat/v1.0.0/match.schema.json", "title": "file-scans", "description": "Scan results of file-scans. For each scanned file, where either a match was found or an error was emitted, one JSON object per line is stored.", "type": "object", @@ -29,7 +29,7 @@ "type": "array", "description": "Contains information about matched rules. Is empty-array if no rules matched.", "items": { - "$ref": "https://yapscan.targodan.de/reportFormat/1.0.0/match.schema.json" + "$ref": "https://yapscan.targodan.de/reportFormat/v1.0.0/match.schema.json" } }, "error": { diff --git a/report/v1.0.0/match.schema.json b/report/v1.0.0/match.schema.json index 57675c0..96af72c 100644 --- a/report/v1.0.0/match.schema.json +++ b/report/v1.0.0/match.schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/match.schema.json", + "$id": "https://yapscan.targodan.de/reportFormat/v1.0.0/match.schema.json", "title": "match", "description": "Information about a yara rule match", "type": "object", diff --git a/report/v1.0.0/memory-scans.schema.json b/report/v1.0.0/memory-scans.schema.json index 7871016..6879e7c 100644 --- a/report/v1.0.0/memory-scans.schema.json +++ b/report/v1.0.0/memory-scans.schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/memory-scans.schema.json", + "$id": "https://yapscan.targodan.de/reportFormat/v1.0.0/memory-scans.schema.json", "title": "memory-scans", "description": "Memory scan results. For each scanned memory section, where either a match was found or an error was emitted, one JSON object per line is stored.", "type": "object", @@ -9,7 +9,7 @@ "type": "array", "description": "Contains information about matched rules. Is empty-array if no rules matched.", "items": { - "$ref": "https://yapscan.targodan.de/reportFormat/1.0.0/match.schema.json" + "$ref": "https://yapscan.targodan.de/reportFormat/v1.0.0/match.schema.json" } }, "pid": { diff --git a/report/v1.0.0/meta.schema.json b/report/v1.0.0/meta.schema.json index 5b7afd1..5bc766f 100644 --- a/report/v1.0.0/meta.schema.json +++ b/report/v1.0.0/meta.schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/meta.schema.json", + "$id": "https://yapscan.targodan.de/reportFormat/v1.0.0/meta.schema.json", "title": "meta", "description": "Metainformation about the yapscan report.", "type": "object", diff --git a/report/v1.0.0/permissions.schema.json b/report/v1.0.0/permissions.schema.json index 32732d6..117808e 100644 --- a/report/v1.0.0/permissions.schema.json +++ b/report/v1.0.0/permissions.schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/permissions.schema.json", + "$id": "https://yapscan.targodan.de/reportFormat/v1.0.0/permissions.schema.json", "title": "permissions", "description": "Permissions of a memory segment", "type": "object", diff --git a/report/v1.0.0/processes.schema.json b/report/v1.0.0/processes.schema.json index e0b5870..dcafb9e 100644 --- a/report/v1.0.0/processes.schema.json +++ b/report/v1.0.0/processes.schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/processes.schema.json", + "$id": "https://yapscan.targodan.de/reportFormat/v1.0.0/processes.schema.json", "title": "processes", "description": "Information about running processes of a scanned system", "type": "object", @@ -59,11 +59,11 @@ "description": "Address of this segment. Note, this value can get very large. make sure your parser uses an int64." }, "allocatedPermissions": { - "$ref": "https://yapscan.targodan.de/reportFormat/1.0.0/permissions.schema.json", + "$ref": "https://yapscan.targodan.de/reportFormat/v1.0.0/permissions.schema.json", "description": "The permissions, this segment was initialized with" }, "currentPermissions": { - "$ref": "https://yapscan.targodan.de/reportFormat/1.0.0/permissions.schema.json", + "$ref": "https://yapscan.targodan.de/reportFormat/v1.0.0/permissions.schema.json", "description": "The permissions, this segment had during time of the scan" }, "size": { @@ -83,8 +83,28 @@ "description": "The type of the memory segment" }, "mappedFile": { - "type": ["string", "null"], - "description": "The path to the mapped file, if the memory segment has a backing-file" + "oneOf": [ + { + "type": "object", + "properties": { + "SHA256": { + "type": "string", + "description": "SHA256 hexdigest of the mapped file, if the file could be read" + }, + "path": { + "type": "string", + "description": "The path of the mapped file" + }, + "MD5": { + "type": "string", + "description": "MD5 hexdigest of the mapped file, if the file could be read" + } + }, + "required": ["path"], + "additionalProperties": false + }, + {"type": "null"} + ] } }, "required": ["parentBaseAddress", "baseAddress", "allocatedPermissions", "currentPermissions", "size", "rss", "state", "type", "mappedFile"], diff --git a/report/v1.0.0/stats.schema.json b/report/v1.0.0/stats.schema.json index cfb98e1..3bc7413 100644 --- a/report/v1.0.0/stats.schema.json +++ b/report/v1.0.0/stats.schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/stats.schema.json", + "$id": "https://yapscan.targodan.de/reportFormat/v1.0.0/stats.schema.json", "title": "stats", "description": "Statistic information about the Yapscan run", "type": "object", diff --git a/report/v1.0.0/systeminfo.schema.json b/report/v1.0.0/systeminfo.schema.json index be42395..9adfb89 100644 --- a/report/v1.0.0/systeminfo.schema.json +++ b/report/v1.0.0/systeminfo.schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://yapscan.targodan.de/reportFormat/1.0.0/systeminfo.schema.json", + "$id": "https://yapscan.targodan.de/reportFormat/v1.0.0/systeminfo.schema.json", "title": "Yapscan Systeminfo", "description": "System information gathered by Yapscan", "type": "object", From 50fc97b725ee3821075e49da4c14ddb081300c94 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Wed, 5 Jan 2022 15:49:20 +0100 Subject: [PATCH 15/26] Fixes time unmarshaling --- report/time.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/report/time.go b/report/time.go index 4449acb..b061a1d 100644 --- a/report/time.go +++ b/report/time.go @@ -31,7 +31,7 @@ func (t *Time) UnmarshalJSON(b []byte) error { return fmt.Errorf("expected a JSON-string as Time, %w", err) } - tmp, err := time.Parse(Format, string(b)) + tmp, err := time.Parse(Format, s) t.Time = tmp return err } From b1a4df7622fa4f6baaa1ae87d627d2bbf2f8fd25 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Wed, 5 Jan 2022 15:50:04 +0100 Subject: [PATCH 16/26] Adds Report-Reader, -Parser, and -Validator --- acceptanceTests/reports_test.go | 248 +++++++++++++++----------------- go.mod | 2 + go.sum | 4 + report/parser.go | 160 +++++++++++++++++++++ report/reader.go | 190 ++++++++++++++++++++++++ report/validator.go | 162 +++++++++++++++++++++ 6 files changed, 633 insertions(+), 133 deletions(-) create mode 100644 report/parser.go create mode 100644 report/reader.go create mode 100644 report/validator.go diff --git a/acceptanceTests/reports_test.go b/acceptanceTests/reports_test.go index f55e68d..5071266 100644 --- a/acceptanceTests/reports_test.go +++ b/acceptanceTests/reports_test.go @@ -4,7 +4,6 @@ import ( "archive/tar" "bytes" "context" - "encoding/json" "fmt" "io" "io/ioutil" @@ -15,6 +14,8 @@ import ( "testing" "testing/quick" + "github.com/fkie-cad/yapscan/testutil" + "github.com/fkie-cad/yapscan/report" "github.com/fkie-cad/yapscan/procio" @@ -169,7 +170,7 @@ func TestFullReportIsWritten_Unencrypted(t *testing.T) { cleanupCapture() conveyMatchWasSuccessful(c, addressOfData, err, stdout, stderr) - conveyReportIsReadable(c, openReportCleartext(), pid, addressOfData, reportDir) + conveyReportIsValidAndHasMatch(c, openReportCleartext(), pid, addressOfData, reportDir) }) } @@ -195,8 +196,8 @@ func TestFullReportIsWritten_Unencrypted_WhenScanningTwoProcesses(t *testing.T) cleanupCapture() conveyMatchWasSuccessful(c, addressOfMatchingData, err, stdout, stderr) - conveyReportIsReadable(c, openReportCleartext(), matchingPID, addressOfMatchingData, reportDir) - conveyReportIsReadableButDoesNotHaveMatch(c, openReportCleartext(), nonMatchingPID, addressOfNonMatchingData, reportDir) + conveyReportIsValidAndHasMatch(c, openReportCleartext(), matchingPID, addressOfMatchingData, reportDir) + conveyReportIsValidButDoesNotHaveMatch(c, openReportCleartext(), nonMatchingPID, addressOfNonMatchingData, reportDir) }) Convey("Scanning two prepared processes (first benign, then matching) with full-report", t, func(c C) { @@ -220,8 +221,8 @@ func TestFullReportIsWritten_Unencrypted_WhenScanningTwoProcesses(t *testing.T) cleanupCapture() conveyMatchWasSuccessful(c, addressOfMatchingData, err, stdout, stderr) - conveyReportIsReadable(c, openReportCleartext(), matchingPID, addressOfMatchingData, reportDir) - conveyReportIsReadableButDoesNotHaveMatch(c, openReportCleartext(), nonMatchingPID, addressOfNonMatchingData, reportDir) + conveyReportIsValidAndHasMatch(c, openReportCleartext(), matchingPID, addressOfMatchingData, reportDir) + conveyReportIsValidButDoesNotHaveMatch(c, openReportCleartext(), nonMatchingPID, addressOfNonMatchingData, reportDir) }) } @@ -250,7 +251,7 @@ func TestPasswordProtectedFullReport(t *testing.T) { conveyMatchWasSuccessful(c, addressOfData, err, stdout, stderr) conveyReportIsNotReadable(c, openReportCleartext(), reportDir) - conveyReportIsReadable(c, openReportWithPassword(password), pid, addressOfData, reportDir) + conveyReportIsValidAndHasMatch(c, openReportWithPassword(password), pid, addressOfData, reportDir) }) } @@ -279,7 +280,7 @@ func TestPGPProtectedFullReport(t *testing.T) { conveyMatchWasSuccessful(c, addressOfData, err, stdout, stderr) conveyReportIsNotReadable(c, openReportCleartext(), reportDir) - conveyReportIsReadable(c, openReportPGP(keyring), pid, addressOfData, reportDir) + conveyReportIsValidAndHasMatch(c, openReportPGP(keyring), pid, addressOfData, reportDir) }) } @@ -334,56 +335,31 @@ func (r *readerWithCloser) Close() error { return r.closer.Close() } -type reportOpenFunc func(reportPath string) (io.ReadCloser, error) +type reportOpenFunc func(reportPath string) (report.Reader, error) func openReportCleartext() reportOpenFunc { - return func(reportPath string) (io.ReadCloser, error) { - return os.Open(reportPath) + return func(reportPath string) (report.Reader, error) { + return report.NewFileReader(reportPath), nil } } func openReportWithPassword(password string) reportOpenFunc { - return func(reportPath string) (io.ReadCloser, error) { - f, err := os.Open(reportPath) - if err != nil { - return nil, err - } - - prompt := func(keys []openpgp.Key, symmetric bool) ([]byte, error) { - return []byte(password), nil - } - msg, err := openpgp.ReadMessage(f, nil, prompt, nil) - if err != nil { - f.Close() - return nil, err - } - return &readerWithCloser{ - rdr: msg.UnverifiedBody, - closer: f, - }, nil + return func(reportPath string) (report.Reader, error) { + rdr := report.NewFileReader(reportPath) + rdr.SetPassword(password) + return rdr, nil } } func openReportPGP(keyring openpgp.EntityList) reportOpenFunc { - return func(reportPath string) (io.ReadCloser, error) { - f, err := os.Open(reportPath) - if err != nil { - return nil, err - } - - msg, err := openpgp.ReadMessage(f, keyring, nil, nil) - if err != nil { - f.Close() - return nil, err - } - return &readerWithCloser{ - rdr: msg.UnverifiedBody, - closer: f, - }, nil + return func(reportPath string) (report.Reader, error) { + rdr := report.NewFileReader(reportPath) + rdr.SetKeyring(keyring) + return rdr, nil } } -func conveyReportIsReadable(c C, openReport reportOpenFunc, pid int, addressOfData uintptr, reportDir string) { +func conveyReportIsValidAndHasMatch(c C, openReport reportOpenFunc, pid int, addressOfData uintptr, reportDir string) { c.Convey("should yield a valid report", func(c C) { reportPath, exists := findReportPath(reportDir) @@ -392,36 +368,22 @@ func conveyReportIsReadable(c C, openReport reportOpenFunc, pid int, addressOfDa return } - report, err := openReport(reportPath) + reportRdr, err := openReport(reportPath) So(err, ShouldBeNil) - defer report.Close() + defer reportRdr.Close() - reportFiles, err := readReport(report) + projectRoot, err := testutil.GetProjectRoot() + So(err, ShouldBeNil) - c.So(reportFiles, ShouldNotBeEmpty) - c.So(err, ShouldBeNil) + validator := report.NewOfflineValidator(projectRoot + "/report") + err = validator.ValidateReport(reportRdr) + So(err, ShouldBeNil) - var memoryScansJSON *file - filenames := make([]string, len(reportFiles)) - for i, file := range reportFiles { - filenames[i] = file.Name - if file.Name == "memory-scans.json" { - memoryScansJSON = file - } - } - c.Convey("which contains the expected files", func(c C) { - c.So(filenames, ShouldContain, "systeminfo.json") - c.So(filenames, ShouldContain, "processes.json") - c.So(filenames, ShouldContain, "memory-scans.json") - c.So(filenames, ShouldContain, "stats.json") - c.So(memoryScansJSON, ShouldNotBeNil) - - conveyReportHasMatch(c, pid, addressOfData, memoryScansJSON) - }) + conveyReportHasMatch(c, pid, addressOfData, reportRdr) }) } -func conveyReportIsReadableButDoesNotHaveMatch(c C, openReport reportOpenFunc, pid int, addressOfData uintptr, reportDir string) { +func conveyReportIsValidButDoesNotHaveMatch(c C, openReport reportOpenFunc, pid int, addressOfData uintptr, reportDir string) { c.Convey("should yield a readable report", func(c C) { reportPath, exists := findReportPath(reportDir) @@ -430,32 +392,18 @@ func conveyReportIsReadableButDoesNotHaveMatch(c C, openReport reportOpenFunc, p return } - report, err := openReport(reportPath) + reportRdr, err := openReport(reportPath) So(err, ShouldBeNil) - defer report.Close() + defer reportRdr.Close() - reportFiles, err := readReport(report) + projectRoot, err := testutil.GetProjectRoot() + So(err, ShouldBeNil) - c.So(reportFiles, ShouldNotBeEmpty) - c.So(err, ShouldBeNil) + validator := report.NewOfflineValidator(projectRoot + "/report") + err = validator.ValidateReport(reportRdr) + So(err, ShouldBeNil) - var memoryScansJSON *file - filenames := make([]string, len(reportFiles)) - for i, file := range reportFiles { - filenames[i] = file.Name - if file.Name == "memory-scans.json" { - memoryScansJSON = file - } - } - c.Convey("which contains the expected files", func(c C) { - c.So(filenames, ShouldContain, "systeminfo.json") - c.So(filenames, ShouldContain, "processes.json") - c.So(filenames, ShouldContain, "memory-scans.json") - c.So(filenames, ShouldContain, "stats.json") - c.So(memoryScansJSON, ShouldNotBeNil) - - conveyReportDoesNotHaveMatch(c, pid, addressOfData, memoryScansJSON) - }) + conveyReportDoesNotHaveMatch(c, pid, addressOfData, reportRdr) }) } @@ -468,14 +416,16 @@ func conveyReportIsAnonymized(c C, openReport reportOpenFunc, reportDir string) return } - report, err := openReport(reportPath) + reportRdr, err := openReport(reportPath) So(err, ShouldBeNil) - defer report.Close() + defer reportRdr.Close() - reportFiles, err := readReport(report) + projectRoot, err := testutil.GetProjectRoot() + So(err, ShouldBeNil) - c.So(reportFiles, ShouldNotBeEmpty) - c.So(err, ShouldBeNil) + validator := report.NewOfflineValidator(projectRoot + "/report") + err = validator.ValidateReport(reportRdr) + So(err, ShouldBeNil) c.Convey("which does not contain the hostname, username or any IPs.", func(c C) { info, err := system.GetInfo() @@ -487,15 +437,45 @@ func conveyReportIsAnonymized(c C, openReport reportOpenFunc, reportDir string) selfInfo, err := self.Info() So(err, ShouldBeNil) - allJSONBuilder := &strings.Builder{} - for _, file := range reportFiles { - if strings.Contains(file.Name, ".json") { - allJSONBuilder.Write(file.Data) - } - } - allJSON := allJSONBuilder.String() + buffer := &bytes.Buffer{} - fmt.Println(info.Hostname) + r, err := reportRdr.OpenMeta() + So(err, ShouldBeNil) + _, err = io.Copy(buffer, r) + So(err, ShouldBeNil) + r.Close() + + r, err = reportRdr.OpenStatistics() + So(err, ShouldBeNil) + _, err = io.Copy(buffer, r) + So(err, ShouldBeNil) + r.Close() + + r, err = reportRdr.OpenSystemInformation() + So(err, ShouldBeNil) + _, err = io.Copy(buffer, r) + So(err, ShouldBeNil) + r.Close() + + r, err = reportRdr.OpenProcesses() + So(err, ShouldBeNil) + _, err = io.Copy(buffer, r) + So(err, ShouldBeNil) + r.Close() + + r, err = reportRdr.OpenMemoryScans() + So(err, ShouldBeNil) + _, err = io.Copy(buffer, r) + So(err, ShouldBeNil) + r.Close() + + r, err = reportRdr.OpenFileScans() + So(err, ShouldBeNil) + _, err = io.Copy(buffer, r) + So(err, ShouldBeNil) + r.Close() + + allJSON := buffer.String() So(allJSON, ShouldNotBeEmpty) So(allJSON, ShouldNotContainSubstring, info.Hostname) @@ -516,60 +496,62 @@ func conveyReportIsNotReadable(c C, openReport reportOpenFunc, reportDir string) return } - report, err := openReport(reportPath) + reportRdr, err := openReport(reportPath) if err != nil { So(err, ShouldNotBeNil) return } - defer report.Close() - - _, err = readReport(report) - c.So(err, ShouldNotBeNil) + defer reportRdr.Close() + + _, errMeta := reportRdr.OpenMeta() + _, errStatistics := reportRdr.OpenStatistics() + _, errSystemInformation := reportRdr.OpenSystemInformation() + _, errProcesses := reportRdr.OpenProcesses() + _, errMemoryScans := reportRdr.OpenMemoryScans() + _, errFileScans := reportRdr.OpenFileScans() + So(errMeta, ShouldNotBeNil) + So(errStatistics, ShouldNotBeNil) + So(errSystemInformation, ShouldNotBeNil) + So(errProcesses, ShouldNotBeNil) + So(errMemoryScans, ShouldNotBeNil) + So(errFileScans, ShouldNotBeNil) }) } -func conveyReportHasMatch(c C, pid int, addressOfData uintptr, memoryScansJSON *file) { +func conveyReportHasMatch(c C, pid int, addressOfData uintptr, reportRdr report.Reader) { c.Convey("with the memory-scans.json containing the correct match.", func() { - dec := json.NewDecoder(bytes.NewReader(memoryScansJSON.Data)) - foundCorrectMatch := false - var err error - for { - re := new(report.MemoryScan) - err = dec.Decode(re) - if err != nil { - break - } + parser := report.NewParser() + rprt, err := parser.Parse(reportRdr) + So(err, ShouldBeNil) - if re.PID == pid && re.MemorySegment == addressOfData && len(re.Matches) > 0 { + foundCorrectMatch := false + for _, scan := range rprt.MemoryScans { + if scan.PID == pid && scan.MemorySegment == addressOfData && len(scan.Matches) > 0 { foundCorrectMatch = true + break } } - c.So(err, ShouldResemble, io.EOF) c.So(foundCorrectMatch, ShouldBeTrue) }) } -func conveyReportDoesNotHaveMatch(c C, pid int, addressOfData uintptr, memoryScansJSON *file) { +func conveyReportDoesNotHaveMatch(c C, pid int, addressOfData uintptr, reportRdr report.Reader) { c.Convey("with the memory-scans.json not containing a false positive.", func() { - dec := json.NewDecoder(bytes.NewReader(memoryScansJSON.Data)) + parser := report.NewParser() + rprt, err := parser.Parse(reportRdr) + So(err, ShouldBeNil) + foundMatchForPID := false foundMatchForAddressInPID := false - var err error - for { - re := new(report.MemoryScan) - err = dec.Decode(re) - if err != nil { - break - } - - if re.PID == pid && len(re.Matches) > 0 { + for _, scan := range rprt.MemoryScans { + if scan.PID == pid && len(scan.Matches) > 0 { foundMatchForPID = true - if re.MemorySegment == addressOfData { + if scan.MemorySegment == addressOfData { foundMatchForAddressInPID = true + break } } } - c.So(err, ShouldResemble, io.EOF) c.So(foundMatchForPID, ShouldBeFalse) c.So(foundMatchForAddressInPID, ShouldBeFalse) }) diff --git a/go.mod b/go.mod index dac8132..1588c1d 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,8 @@ require ( github.com/klauspost/compress v1.13.6 github.com/kr/text v0.2.0 // indirect github.com/mattn/go-colorable v0.1.12 // indirect + github.com/santhosh-tekuri/jsonschema v1.2.4 // indirect + github.com/santhosh-tekuri/jsonschema/v5 v5.0.0 github.com/sirupsen/logrus v1.8.1 github.com/smartystreets/assertions v1.2.0 // indirect github.com/smartystreets/goconvey v1.6.4 diff --git a/go.sum b/go.sum index 7a6b183..8b4cdce 100644 --- a/go.sum +++ b/go.sum @@ -172,6 +172,10 @@ github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/santhosh-tekuri/jsonschema v1.2.4 h1:hNhW8e7t+H1vgY+1QeEQpveR6D4+OwKPXCfD2aieJis= +github.com/santhosh-tekuri/jsonschema v1.2.4/go.mod h1:TEAUOeZSmIxTTuHatJzrvARHiuO9LYd+cIxzgEHCQI4= +github.com/santhosh-tekuri/jsonschema/v5 v5.0.0 h1:TToq11gyfNlrMFZiYujSekIsPd9AmsA2Bj/iv+s4JHE= +github.com/santhosh-tekuri/jsonschema/v5 v5.0.0/go.mod h1:FKdcjfQW6rpZSnxxUvEA5H/cDPdvJ/SZJQLWWXWGrZ0= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/shurcooL/go v0.0.0-20200502201357-93f07166e636/go.mod h1:TDJrrUr11Vxrven61rcy3hJMUqaf/CLWYhHNPmT14Lk= github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749/go.mod h1:ZY1cvUeJuFPAdZ/B6v7RHavJWZn2YPVFQ1OSXhCGOkg= diff --git a/report/parser.go b/report/parser.go new file mode 100644 index 0000000..2ab500b --- /dev/null +++ b/report/parser.go @@ -0,0 +1,160 @@ +package report + +import ( + "encoding/json" + "fmt" + "io" +) + +type Parser struct{} + +func NewParser() *Parser { + return &Parser{} +} + +func (p *Parser) Parse(rdr Reader) (*Report, error) { + meta, err := p.parseMeta(rdr) + if err != nil { + return nil, err + } + + if meta.FormatVersion.String() != "1.0.0" { + return nil, fmt.Errorf("unsupported report version \"%v\", expected \"1.0.0\"", meta.FormatVersion) + } + + stats, err := p.parseStatistics(rdr) + if err != nil { + return nil, err + } + + sysInfo, err := p.parseSystemInformation(rdr) + if err != nil { + return nil, err + } + + processes, err := p.parseProcesses(rdr) + if err != nil { + return nil, err + } + + memScans, err := p.parseMemoryScans(rdr) + if err != nil { + return nil, err + } + + fileScans, err := p.parseFileScans(rdr) + if err != nil { + return nil, err + } + + return &Report{ + Meta: meta, + Stats: stats, + SystemInfo: sysInfo, + Processes: processes, + MemoryScans: memScans, + FileScans: fileScans, + }, nil +} + +func (p *Parser) parseMeta(rdr Reader) (*MetaInformation, error) { + r, err := rdr.OpenMeta() + if err != nil { + return nil, err + } + var data MetaInformation + err = json.NewDecoder(r).Decode(&data) + return &data, err +} + +func (p *Parser) parseStatistics(rdr Reader) (*ScanningStatistics, error) { + r, err := rdr.OpenStatistics() + if err != nil { + return nil, err + } + var data ScanningStatistics + err = json.NewDecoder(r).Decode(&data) + return &data, err +} + +func (p *Parser) parseSystemInformation(rdr Reader) (*SystemInfo, error) { + r, err := rdr.OpenSystemInformation() + if err != nil { + return nil, err + } + var data SystemInfo + err = json.NewDecoder(r).Decode(&data) + return &data, err +} + +func (p *Parser) parseProcesses(rdr Reader) ([]*ProcessInfo, error) { + r, err := rdr.OpenProcesses() + if err != nil { + return nil, err + } + + decoder := json.NewDecoder(r) + + data := make([]*ProcessInfo, 0) + for { + var obj ProcessInfo + err = decoder.Decode(&obj) + if err != nil { + break + } + data = append(data, &obj) + } + if err != io.EOF { + return nil, err + } + + return data, nil +} + +func (p *Parser) parseMemoryScans(rdr Reader) ([]*MemoryScan, error) { + r, err := rdr.OpenMemoryScans() + if err != nil { + return nil, err + } + + decoder := json.NewDecoder(r) + + data := make([]*MemoryScan, 0) + for { + var obj MemoryScan + err = decoder.Decode(&obj) + if err != nil { + break + } + data = append(data, &obj) + } + if err != io.EOF { + return nil, err + } + + return data, nil +} + +func (p *Parser) parseFileScans(rdr Reader) ([]*FileScan, error) { + r, err := rdr.OpenFileScans() + if err != nil { + return nil, err + } + + decoder := json.NewDecoder(r) + + data := make([]*FileScan, 0) + for { + var obj FileScan + err = decoder.Decode(&obj) + if err != nil { + break + } + data = append(data, &obj) + } + if err != io.EOF { + return nil, err + } + + return data, nil +} diff --git a/report/reader.go b/report/reader.go new file mode 100644 index 0000000..91f28f8 --- /dev/null +++ b/report/reader.go @@ -0,0 +1,190 @@ +package report + +import ( + "archive/tar" + "bytes" + "fmt" + "io" + "os" + "path/filepath" + + "golang.org/x/crypto/openpgp" + + "github.com/klauspost/compress/zstd" +) + +type Reader interface { + SetPassword(password string) + SetKeyring(keyring openpgp.KeyRing) + OpenMeta() (io.ReadCloser, error) + OpenSystemInformation() (io.ReadCloser, error) + OpenStatistics() (io.ReadCloser, error) + OpenProcesses() (io.ReadCloser, error) + OpenMemoryScans() (io.ReadCloser, error) + OpenFileScans() (io.ReadCloser, error) + io.Closer +} + +type FileReader struct { + path string + password string + keyring openpgp.KeyRing + + hasRead bool + lastError error + + metaBuffer []byte + statsBuffer []byte + systemInfoBuffer []byte + processesBuffer []byte + memoryScansBuffer []byte + fileScansBuffer []byte +} + +func NewFileReader(path string) Reader { + return &FileReader{ + path: path, + } +} + +func (rdr *FileReader) decryptIfNecessary(in io.Reader) (io.Reader, error) { + if rdr.password == "" && rdr.keyring == nil { + return in, nil + } + + var prompt openpgp.PromptFunction + + if rdr.password != "" { + prompt = func(keys []openpgp.Key, symmetric bool) ([]byte, error) { + return []byte(rdr.password), nil + } + } + + msg, err := openpgp.ReadMessage(in, rdr.keyring, prompt, nil) + if err != nil { + return nil, err + } + + return msg.UnverifiedBody, nil +} + +func (rdr *FileReader) readAll() { + if rdr.hasRead { + return + } + defer func() { + rdr.hasRead = true + }() + + file, err := os.Open(rdr.path) + if err != nil { + rdr.lastError = err + return + } + defer file.Close() + + fileRdr, err := rdr.decryptIfNecessary(file) + if err != nil { + rdr.lastError = err + return + } + + zstdRdr, err := zstd.NewReader(fileRdr) + if err != nil { + rdr.lastError = err + return + } + defer zstdRdr.Close() + + tarRdr := tar.NewReader(zstdRdr) + for { + var hdr *tar.Header + hdr, err = tarRdr.Next() + if err != nil { + break + } + if hdr.Typeflag == tar.TypeReg { + buf := &bytes.Buffer{} + if _, err = io.Copy(buf, tarRdr); err != nil { + break + } + + switch filepath.Base(hdr.Name) { + case MetaFileName: + rdr.metaBuffer = buf.Bytes() + case ScanningStatisticsFileName: + rdr.statsBuffer = buf.Bytes() + case SystemInfoFileName: + rdr.systemInfoBuffer = buf.Bytes() + case ProcessesFileName: + rdr.processesBuffer = buf.Bytes() + case MemoryScansFileName: + rdr.memoryScansBuffer = buf.Bytes() + case FileScansFileName: + rdr.fileScansBuffer = buf.Bytes() + } + } + } + + if err == io.EOF { + err = nil + } + + rdr.lastError = err +} + +func (rdr *FileReader) SetPassword(password string) { + rdr.password = password +} + +func (rdr *FileReader) SetKeyring(keyring openpgp.KeyRing) { + rdr.keyring = keyring +} + +func (rdr *FileReader) OpenMeta() (io.ReadCloser, error) { + rdr.readAll() + return io.NopCloser(bytes.NewReader(rdr.metaBuffer)), rdr.lastError +} + +func (rdr *FileReader) OpenSystemInformation() (io.ReadCloser, error) { + rdr.readAll() + return io.NopCloser(bytes.NewReader(rdr.systemInfoBuffer)), rdr.lastError +} + +func (rdr *FileReader) OpenStatistics() (io.ReadCloser, error) { + rdr.readAll() + return io.NopCloser(bytes.NewReader(rdr.statsBuffer)), rdr.lastError +} + +func (rdr *FileReader) OpenProcesses() (io.ReadCloser, error) { + rdr.readAll() + return io.NopCloser(bytes.NewReader(rdr.processesBuffer)), rdr.lastError +} + +func (rdr *FileReader) OpenMemoryScans() (io.ReadCloser, error) { + rdr.readAll() + return io.NopCloser(bytes.NewReader(rdr.memoryScansBuffer)), rdr.lastError +} + +func (rdr *FileReader) OpenFileScans() (io.ReadCloser, error) { + rdr.readAll() + return io.NopCloser(bytes.NewReader(rdr.fileScansBuffer)), rdr.lastError +} + +func (rdr *FileReader) Close() error { + return nil +} + +func ReadArmoredKeyring(path string) (openpgp.KeyRing, error) { + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("could not open keyring, reason: %w", err) + } + defer f.Close() + + keyring, err := openpgp.ReadArmoredKeyRing(f) + if err != nil { + return nil, fmt.Errorf("could not read keyring, reason: %w", err) + } + return keyring, nil +} diff --git a/report/validator.go b/report/validator.go new file mode 100644 index 0000000..5c8a79a --- /dev/null +++ b/report/validator.go @@ -0,0 +1,162 @@ +package report + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "strings" + + jsonschema "github.com/santhosh-tekuri/jsonschema/v5" + _ "github.com/santhosh-tekuri/jsonschema/v5/httploader" +) + +type Validator struct { + compiler *jsonschema.Compiler +} + +func newValidator() *Validator { + return &Validator{ + compiler: jsonschema.NewCompiler(), + } +} + +func NewOfflineValidator(schemaRootPath string) *Validator { + v := newValidator() + + schemaRootPath = strings.TrimRight(schemaRootPath, "/\\") + + v.compiler.LoadURL = func(url string) (io.ReadCloser, error) { + if strings.Index(url, schemaURLBase) != 0 { + return nil, fmt.Errorf("schema URL \"%s\" is invalid for yapscan reports", url) + } + path := schemaRootPath + url[len(schemaURLBase):] + return os.Open(path) + } + + return v +} + +func NewOnlineValidator(schemaRootPath string) *Validator { + return newValidator() +} + +func (v *Validator) ValidateReport(rdr Reader) error { + in, err := rdr.OpenMeta() + if err != nil { + return err + } + metaData, err := v.validateSingleObject(MetaV1Schema, in) + in.Close() + if err != nil { + return err + } + + schemaURLs := make(map[string]string) + for file, url := range metaData["schemaURLs"].(map[string]interface{}) { + schemaURLs[file] = url.(string) + } + + in, err = rdr.OpenSystemInformation() + if err != nil { + return err + } + _, err = v.validateSingleObject(schemaURLs[SystemInfoFileName], in) + in.Close() + if err != nil { + return err + } + + in, err = rdr.OpenStatistics() + if err != nil { + return err + } + _, err = v.validateSingleObject(schemaURLs[ScanningStatisticsFileName], in) + in.Close() + if err != nil { + return err + } + + in, err = rdr.OpenProcesses() + if err != nil { + return err + } + err = v.validateMultipleObjects(schemaURLs[ProcessesFileName], in) + in.Close() + if err != nil { + return err + } + + in, err = rdr.OpenMemoryScans() + if err != nil { + return err + } + err = v.validateMultipleObjects(schemaURLs[MemoryScansFileName], in) + in.Close() + if err != nil { + return err + } + + in, err = rdr.OpenFileScans() + if err != nil { + return err + } + err = v.validateMultipleObjects(schemaURLs[FileScansFileName], in) + in.Close() + return err +} + +func (v *Validator) validateSingleObject(schemaURL string, in io.Reader) (map[string]interface{}, error) { + metaSchema, err := v.compiler.Compile(schemaURL) + if err != nil { + return nil, err + } + + b, err := ioutil.ReadAll(in) + if err != nil { + return nil, err + } + + data := make(map[string]interface{}) + err = json.Unmarshal(b, &data) + if err != nil { + return nil, err + } + + return data, metaSchema.Validate(data) +} + +func (v *Validator) validateMultipleObjects(schemaURL string, in io.Reader) error { + schema, err := v.compiler.Compile(schemaURL) + if err != nil { + return err + } + + rdr := bufio.NewReader(in) + for { + var line string + line, err = rdr.ReadString('\n') + line = strings.Trim(line, " \n\r\t") + if line != "" { + data := make(map[string]interface{}) + validationErr := json.Unmarshal([]byte(line), &data) + if validationErr != nil { + return validationErr + } + validationErr = schema.Validate(data) + if validationErr != nil { + return validationErr + } + } + if err != nil { + break + } + } + if err != nil && err != io.EOF { + return err + } + + return nil +} From f8be5641034d9c49c2fe1ef51e0bdb2e4a166f70 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Wed, 5 Jan 2022 16:05:12 +0100 Subject: [PATCH 17/26] Fixes time format --- report/time.go | 2 +- report/v1.0.0/stats.schema.json | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/report/time.go b/report/time.go index b061a1d..07a125a 100644 --- a/report/time.go +++ b/report/time.go @@ -6,7 +6,7 @@ import ( "time" ) -const Format = "2006-01-02T15:04:05.000000Z07:00" +const Format = "2006-01-02T15:04:05.000000Z-07:00" type Time struct { time.Time diff --git a/report/v1.0.0/stats.schema.json b/report/v1.0.0/stats.schema.json index 3bc7413..a36a464 100644 --- a/report/v1.0.0/stats.schema.json +++ b/report/v1.0.0/stats.schema.json @@ -11,7 +11,7 @@ }, "start": { "type": "string", - "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}[+-]\\d{2}:\\d{2}$", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}Z[+-]\\d{2}:\\d{2}$", "description": "Datetime of the start of the scan. Format is RFC3339 with added micro seconds." }, "profilingInformation": { @@ -46,7 +46,7 @@ }, "time": { "type": "string", - "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}[+-]\\d{2}:\\d{2}$", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}Z[+-]\\d{2}:\\d{2}$", "description": "Datetime of the stats snapshot. Format is RFC3339 with added micro seconds." } } @@ -54,7 +54,7 @@ }, "end": { "type": "string", - "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}[+-]\\d{2}:\\d{2}$", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}Z[+-]\\d{2}:\\d{2}$", "description": "Datetime of the start of the scan. Format is RFC3339 with added micro seconds." }, "numberOfProcessesScanned": { From 75435fc84059fe67051e0cc1e9e1bb5eafa238d0 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Wed, 5 Jan 2022 16:14:11 +0100 Subject: [PATCH 18/26] Fixes flattening of memory segments --- output/analysisReporter.go | 10 +++++++++- report/report.go | 8 ++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/output/analysisReporter.go b/output/analysisReporter.go index 10c1ba8..b9534bf 100644 --- a/output/analysisReporter.go +++ b/output/analysisReporter.go @@ -15,6 +15,13 @@ import ( "github.com/targodan/go-errors" ) +// FileScan represents all matches on a file. +type FileScan struct { + File fileio.File `json:"file"` + Matches []*report.Match `json:"match"` + Error interface{} `json:"error"` +} + // AnalysisReporter implements a Reporter, which is // specifically intended for later analysis of the report // in order to determine rule quality. @@ -96,6 +103,7 @@ func (r *AnalysisReporter) ReportRules(rules *yara.Rules) error { func (r *AnalysisReporter) flattenSubsegments(segments []*procio.MemorySegmentInfo) []*procio.MemorySegmentInfo { newSegments := make([]*procio.MemorySegmentInfo, 0, len(segments)) for _, seg := range segments { + newSegments = append(newSegments, seg) if len(seg.SubSegments) > 0 { subSegments := r.flattenSubsegments(seg.SubSegments) newSegments = append(newSegments, subSegments...) @@ -219,7 +227,7 @@ func (r *AnalysisReporter) ConsumeFSScanProgress(progress <-chan *fileio.FSScanP } } - err = encoder.Encode(&report.FileScan{ + err = encoder.Encode(&FileScan{ File: prog.File, Matches: ConvertYaraMatchRules(prog.Matches), Error: jsonErr, diff --git a/report/report.go b/report/report.go index 421faa6..e2fa022 100644 --- a/report/report.go +++ b/report/report.go @@ -88,7 +88,7 @@ type MemorySegmentInfo struct { // File contains the path to the mapped file, or empty string if // no file mapping is associated with this memory segment. - MappedFile fileio.File `json:"mappedFile"` + MappedFile *fileio.OSFile `json:"mappedFile"` } // SystemInfo contains information about the running system. @@ -115,9 +115,9 @@ type MemoryScan struct { // FileScan represents all matches on a file. type FileScan struct { - File fileio.File `json:"file"` - Matches []*Match `json:"match"` - Error interface{} `json:"error"` + File *fileio.OSFile `json:"file"` + Matches []*Match `json:"match"` + Error interface{} `json:"error"` } // Match represents the match of a yara Rule. From 1dbb4c5282dbadf83ec6fd464f891c47327c2fe1 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Thu, 6 Jan 2022 08:14:06 +0100 Subject: [PATCH 19/26] Fixes schemas and reformats them slightly --- report/v1.0.0/match.schema.json | 4 +-- report/v1.0.0/memory-scans.schema.json | 4 +-- report/v1.0.0/meta.schema.json | 4 +-- report/v1.0.0/processes.schema.json | 8 ++---- report/v1.0.0/stats.schema.json | 38 ++++++++++++++------------ report/v1.0.0/systeminfo.schema.json | 4 +-- 6 files changed, 27 insertions(+), 35 deletions(-) diff --git a/report/v1.0.0/match.schema.json b/report/v1.0.0/match.schema.json index 96af72c..f6f742a 100644 --- a/report/v1.0.0/match.schema.json +++ b/report/v1.0.0/match.schema.json @@ -18,9 +18,7 @@ "type": "string", "description": "The name of the matched string as defined in the yara rule" }, - "base": { - "type": "number" - } + "base": {"type": "number"} }, "required": ["offset", "name", "base"], "additionalProperties": false diff --git a/report/v1.0.0/memory-scans.schema.json b/report/v1.0.0/memory-scans.schema.json index 6879e7c..c60dd8c 100644 --- a/report/v1.0.0/memory-scans.schema.json +++ b/report/v1.0.0/memory-scans.schema.json @@ -8,9 +8,7 @@ "match": { "type": "array", "description": "Contains information about matched rules. Is empty-array if no rules matched.", - "items": { - "$ref": "https://yapscan.targodan.de/reportFormat/v1.0.0/match.schema.json" - } + "items": {"$ref": "https://yapscan.targodan.de/reportFormat/v1.0.0/match.schema.json"} }, "pid": { "type": "integer", diff --git a/report/v1.0.0/meta.schema.json b/report/v1.0.0/meta.schema.json index 5bc766f..39c5423 100644 --- a/report/v1.0.0/meta.schema.json +++ b/report/v1.0.0/meta.schema.json @@ -13,9 +13,7 @@ "type": "object", "description": "SchemaURLs for the files of the report. There is one schema link for each JSON file contained in the report.", "patternProperties": { - "\\.json$": { - "type": "string" - } + "\\.json$": {"type": "string"} } }, "formatVersion": { diff --git a/report/v1.0.0/processes.schema.json b/report/v1.0.0/processes.schema.json index dcafb9e..0bb23b8 100644 --- a/report/v1.0.0/processes.schema.json +++ b/report/v1.0.0/processes.schema.json @@ -23,9 +23,7 @@ "type": "string", "pattern": "^[a-f0-9]{32}$" }, - { - "type": "null" - } + {"type": "null"} ], "description": "MD5 hexdigest of the executable file, if the file could be read" }, @@ -35,9 +33,7 @@ "type": "string", "pattern": "^[a-f0-9]{64}$" }, - { - "type": "null" - } + {"type": "null"} ], "description": "SHA256 hexdigest of the executable file, if the file could be read" }, diff --git a/report/v1.0.0/stats.schema.json b/report/v1.0.0/stats.schema.json index a36a464..1533aea 100644 --- a/report/v1.0.0/stats.schema.json +++ b/report/v1.0.0/stats.schema.json @@ -5,15 +5,16 @@ "description": "Statistic information about the Yapscan run", "type": "object", "properties": { - "numberOfFileBytesScanned": { - "type": "integer", - "description": "Number of total bytes scanned from files. Note, this value can get very large. make sure your parser uses an int64." - }, "start": { "type": "string", "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}Z[+-]\\d{2}:\\d{2}$", "description": "Datetime of the start of the scan. Format is RFC3339 with added micro seconds." }, + "end": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}Z[+-]\\d{2}:\\d{2}$", + "description": "Datetime of the start of the scan. Format is RFC3339 with added micro seconds." + }, "profilingInformation": { "type": "array", "items": { @@ -26,12 +27,6 @@ "type": "integer", "description": "The currently free swap in bytes. Note, this value can get very large. make sure your parser uses an int64." }, - "loadAvgFifteenMinutes": { - "type": "number", - "description": "The load average over the last fifteen minutes, normalized over the number of CPUs. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first 15 minutes of the scan.", - "minimum": 0.0, - "maximum": 1.0 - }, "loadAvgOneMinute": { "type": "number", "description": "The load average over the last minute, normalized over the number of CPUs. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first minute of the scan.", @@ -44,19 +39,22 @@ "minimum": 0.0, "maximum": 1.0 }, + "loadAvgFifteenMinutes": { + "type": "number", + "description": "The load average over the last fifteen minutes, normalized over the number of CPUs. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first 15 minutes of the scan.", + "minimum": 0.0, + "maximum": 1.0 + }, "time": { "type": "string", "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}Z[+-]\\d{2}:\\d{2}$", "description": "Datetime of the stats snapshot. Format is RFC3339 with added micro seconds." } - } + }, + "required": ["freeRAM", "freeSwap", "loadAvgOneMinute", "loadAvgFiveMinutes", "loadAvgFifteenMinutes", "time"], + "additionalProperties": false } }, - "end": { - "type": "string", - "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}Z[+-]\\d{2}:\\d{2}$", - "description": "Datetime of the start of the scan. Format is RFC3339 with added micro seconds." - }, "numberOfProcessesScanned": { "type": "number", "description": "The number of scanned processes" @@ -72,6 +70,12 @@ "numberOfFilesScanned": { "type": "number", "description": "The number of scanned files" + }, + "numberOfFileBytesScanned": { + "type": "integer", + "description": "Number of total bytes scanned from files. Note, this value can get very large. make sure your parser uses an int64." } - } + }, + "required": ["start", "end", "profilingInformation", "numberOfProcessesScanned", "numberOfSegmentsScanned", "numberOfMemoryBytesScanned", "numberOfFilesScanned", "numberOfFileBytesScanned"], + "additionalProperties": false } \ No newline at end of file diff --git a/report/v1.0.0/systeminfo.schema.json b/report/v1.0.0/systeminfo.schema.json index 9adfb89..7ad5bc6 100644 --- a/report/v1.0.0/systeminfo.schema.json +++ b/report/v1.0.0/systeminfo.schema.json @@ -35,9 +35,7 @@ }, "ips": { "type": "array", - "items": { - "type": "string" - }, + "items": {"type": "string"}, "description": "List of local IPs" }, "totalRAM": { From a6a6bb530bab761ccff325af1c6ccd37a9fca60b Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Thu, 6 Jan 2022 08:14:19 +0100 Subject: [PATCH 20/26] Makes linter happy --- report/validator.go | 1 + 1 file changed, 1 insertion(+) diff --git a/report/validator.go b/report/validator.go index 5c8a79a..15d19c8 100644 --- a/report/validator.go +++ b/report/validator.go @@ -10,6 +10,7 @@ import ( "strings" jsonschema "github.com/santhosh-tekuri/jsonschema/v5" + // Enable HTTP loading of schemas by default _ "github.com/santhosh-tekuri/jsonschema/v5/httploader" ) From a9150db2a1d37c769d68cd21920153f13409c257 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Thu, 6 Jan 2022 08:20:11 +0100 Subject: [PATCH 21/26] Makes md5 and sha256 lowercase in JSON --- fileio/file.go | 4 ++-- output/filtering.go | 4 ++-- report/report.go | 15 ++++++++++----- report/v1.0.0/file-scans.schema.json | 10 +++++----- report/v1.0.0/processes.schema.json | 10 +++++----- 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/fileio/file.go b/fileio/file.go index 0f0cc35..fa58c2a 100644 --- a/fileio/file.go +++ b/fileio/file.go @@ -14,8 +14,8 @@ type File interface { type OSFile struct { FilePath string `json:"path"` - MD5Sum string `json:"MD5,omitempty"` - SHA256Sum string `json:"SHA256,omitempty"` + MD5Sum string `json:"md5,omitempty"` + SHA256Sum string `json:"sha256,omitempty"` } func NewFile(path string) File { diff --git a/output/filtering.go b/output/filtering.go index 9647a62..df7d02a 100644 --- a/output/filtering.go +++ b/output/filtering.go @@ -348,8 +348,8 @@ func (f *AnonymizingFilter) FilterFSScanProgress(scan *fileio.FSScanProgress) *f type AnonymizedFile struct { FilePath string `json:"path"` - MD5Sum string `json:"MD5,omitempty"` - SHA256Sum string `json:"SHA256,omitempty"` + MD5Sum string `json:"md5,omitempty"` + SHA256Sum string `json:"sha256,omitempty"` origFile fileio.File } diff --git a/report/report.go b/report/report.go index e2fa022..9bedf0c 100644 --- a/report/report.go +++ b/report/report.go @@ -2,7 +2,6 @@ package report import ( "github.com/fkie-cad/yapscan/arch" - "github.com/fkie-cad/yapscan/fileio" "github.com/fkie-cad/yapscan/procio" ) @@ -88,7 +87,7 @@ type MemorySegmentInfo struct { // File contains the path to the mapped file, or empty string if // no file mapping is associated with this memory segment. - MappedFile *fileio.OSFile `json:"mappedFile"` + MappedFile *File `json:"mappedFile"` } // SystemInfo contains information about the running system. @@ -115,9 +114,9 @@ type MemoryScan struct { // FileScan represents all matches on a file. type FileScan struct { - File *fileio.OSFile `json:"file"` - Matches []*Match `json:"match"` - Error interface{} `json:"error"` + File *File `json:"file"` + Matches []*Match `json:"match"` + Error interface{} `json:"error"` } // Match represents the match of a yara Rule. @@ -133,3 +132,9 @@ type MatchString struct { Base uint64 `json:"base"` Offset uint64 `json:"offset"` } + +type File struct { + FilePath string `json:"path"` + MD5Sum string `json:"md5,omitempty"` + SHA256Sum string `json:"sha256,omitempty"` +} diff --git a/report/v1.0.0/file-scans.schema.json b/report/v1.0.0/file-scans.schema.json index e08203e..19bd645 100644 --- a/report/v1.0.0/file-scans.schema.json +++ b/report/v1.0.0/file-scans.schema.json @@ -9,17 +9,17 @@ "type": "object", "description": "Information about the scanned file", "properties": { - "SHA256": { - "type": "string", - "description": "SHA256 hexdigest of the executable file, if the file could be read" - }, "path": { "type": "string", "description": "The path of the scanned file" }, - "MD5": { + "md5": { "type": "string", "description": "MD5 hexdigest of the executable file, if the file could be read" + }, + "sha256": { + "type": "string", + "description": "SHA256 hexdigest of the executable file, if the file could be read" } }, "required": ["path"], diff --git a/report/v1.0.0/processes.schema.json b/report/v1.0.0/processes.schema.json index 0bb23b8..c883b75 100644 --- a/report/v1.0.0/processes.schema.json +++ b/report/v1.0.0/processes.schema.json @@ -83,17 +83,17 @@ { "type": "object", "properties": { - "SHA256": { - "type": "string", - "description": "SHA256 hexdigest of the mapped file, if the file could be read" - }, "path": { "type": "string", "description": "The path of the mapped file" }, - "MD5": { + "md5": { "type": "string", "description": "MD5 hexdigest of the mapped file, if the file could be read" + }, + "sha256": { + "type": "string", + "description": "SHA256 hexdigest of the mapped file, if the file could be read" } }, "required": ["path"], From 48f7925c39141b287b2ce076aec010cc4d02277b Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Thu, 6 Jan 2022 09:27:53 +0100 Subject: [PATCH 22/26] Extracts datetime regex --- report/v1.0.0/datetime.schema.json | 8 ++++++++ report/v1.0.0/stats.schema.json | 9 +++------ 2 files changed, 11 insertions(+), 6 deletions(-) create mode 100644 report/v1.0.0/datetime.schema.json diff --git a/report/v1.0.0/datetime.schema.json b/report/v1.0.0/datetime.schema.json new file mode 100644 index 0000000..084fcd1 --- /dev/null +++ b/report/v1.0.0/datetime.schema.json @@ -0,0 +1,8 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://yapscan.targodan.de/reportFormat/v1.0.0/datetime.schema.json", + "title": "datetime", + "description": "Datetime in RFC3339 with timezone and added micro seconds.", + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}Z[+-]\\d{2}:\\d{2}$" +} \ No newline at end of file diff --git a/report/v1.0.0/stats.schema.json b/report/v1.0.0/stats.schema.json index 1533aea..f1329ad 100644 --- a/report/v1.0.0/stats.schema.json +++ b/report/v1.0.0/stats.schema.json @@ -6,13 +6,11 @@ "type": "object", "properties": { "start": { - "type": "string", - "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}Z[+-]\\d{2}:\\d{2}$", + "$ref": "https://yapscan.targodan.de/reportFormat/v1.0.0/datetime.schema.json", "description": "Datetime of the start of the scan. Format is RFC3339 with added micro seconds." }, "end": { - "type": "string", - "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}Z[+-]\\d{2}:\\d{2}$", + "$ref": "https://yapscan.targodan.de/reportFormat/v1.0.0/datetime.schema.json", "description": "Datetime of the start of the scan. Format is RFC3339 with added micro seconds." }, "profilingInformation": { @@ -46,8 +44,7 @@ "maximum": 1.0 }, "time": { - "type": "string", - "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{6}Z[+-]\\d{2}:\\d{2}$", + "$ref": "https://yapscan.targodan.de/reportFormat/v1.0.0/datetime.schema.json", "description": "Datetime of the stats snapshot. Format is RFC3339 with added micro seconds." } }, From 77f1ed842c38394c94f6979e8ce7af1fa9bed0d4 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Thu, 6 Jan 2022 09:43:33 +0100 Subject: [PATCH 23/26] Fixes load average schema Load average can exceed 1.0 on linux, meaning the system is overloaded. --- report/v1.0.0/stats.schema.json | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/report/v1.0.0/stats.schema.json b/report/v1.0.0/stats.schema.json index f1329ad..40c426a 100644 --- a/report/v1.0.0/stats.schema.json +++ b/report/v1.0.0/stats.schema.json @@ -27,21 +27,18 @@ }, "loadAvgOneMinute": { "type": "number", - "description": "The load average over the last minute, normalized over the number of CPUs. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first minute of the scan.", - "minimum": 0.0, - "maximum": 1.0 + "description": "The load average over the last minute, normalized over the number of CPUs, i.e. a value of 1.0 means the system is fully loaded. On linux this value can exceed 1.0, meaning processes are waiting for CPU time. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first minute of the scan.", + "minimum": 0.0 }, "loadAvgFiveMinutes": { "type": "number", - "description": "The load average over the last five minutes, normalized over the number of CPUs. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first 5 minutes of the scan.", - "minimum": 0.0, - "maximum": 1.0 + "description": "The load average over the last five minutes, normalized over the number of CPUs, i.e. a value of 1.0 means the system is fully loaded. On linux this value can exceed 1.0, meaning processes are waiting for CPU time. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first 5 minutes of the scan.", + "minimum": 0.0 }, "loadAvgFifteenMinutes": { "type": "number", - "description": "The load average over the last fifteen minutes, normalized over the number of CPUs. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first 15 minutes of the scan.", - "minimum": 0.0, - "maximum": 1.0 + "description": "The load average over the last fifteen minutes, normalized over the number of CPUs, i.e. a value of 1.0 means the system is fully loaded. On linux this value can exceed 1.0, meaning processes are waiting for CPU time. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first 15 minutes of the scan.", + "minimum": 0.0 }, "time": { "$ref": "https://yapscan.targodan.de/reportFormat/v1.0.0/datetime.schema.json", From 77089915446470e7bde66e7f3749762885e3e1f9 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Thu, 6 Jan 2022 10:33:23 +0100 Subject: [PATCH 24/26] Adds generated schema documentation --- report/v1.0.0/README.md | 14 +- report/v1.0.0/datetime.schema.html | 1 + report/v1.0.0/file-scans.schema.html | 1 + report/v1.0.0/match.schema.html | 1 + report/v1.0.0/memory-scans.schema.html | 1 + report/v1.0.0/meta.schema.html | 1 + report/v1.0.0/permissions.schema.html | 1 + report/v1.0.0/processes.schema.html | 1 + report/v1.0.0/schema_doc.css | 180 +++++++++++++++++++++++++ report/v1.0.0/schema_doc.min.js | 1 + report/v1.0.0/stats.schema.html | 1 + report/v1.0.0/systeminfo.schema.html | 1 + 12 files changed, 197 insertions(+), 7 deletions(-) create mode 100644 report/v1.0.0/datetime.schema.html create mode 100644 report/v1.0.0/file-scans.schema.html create mode 100644 report/v1.0.0/match.schema.html create mode 100644 report/v1.0.0/memory-scans.schema.html create mode 100644 report/v1.0.0/meta.schema.html create mode 100644 report/v1.0.0/permissions.schema.html create mode 100644 report/v1.0.0/processes.schema.html create mode 100644 report/v1.0.0/schema_doc.css create mode 100644 report/v1.0.0/schema_doc.min.js create mode 100644 report/v1.0.0/stats.schema.html create mode 100644 report/v1.0.0/systeminfo.schema.html diff --git a/report/v1.0.0/README.md b/report/v1.0.0/README.md index e12469a..20b1c49 100644 --- a/report/v1.0.0/README.md +++ b/report/v1.0.0/README.md @@ -38,29 +38,29 @@ The only exception from this is the `meta.schema.json`, which is more lax to all This file contains meta information about the report. The `meta.json` has stricter promises regarding compatibility than the other files, as it is essential for parser implementations. -The `meta.json` will validate correctly against the [meta.schema.json of version 1.0.0](https://yapscan.targodan.de/reportFormat/1.0.0/meta.schema.json) for **any update except a MAJOR-Update**. +The `meta.json` will validate correctly against the [meta.schema.json of version 1.0.0](https://yapscan.targodan.de/reportFormat/v1.0.0/meta.schema.json) for **any update except a MAJOR-Update**. This means only the addition of fields to this file is allowed, not removal, renaming or changing of contents. -Latest Schema: [meta.schema.json](https://yapscan.targodan.de/reportFormat/latest/meta.schema.json) +Latest Schema: [meta.schema.json](https://yapscan.targodan.de/reportFormat/v1.0.0/meta.schema.json) / [meta.schema.html](https://yapscan.targodan.de/reportFormat/v1.0.0/meta.schema.html) ### stats.json This file contains statistic information about the scan. -Latest Schema: [stats.schema.json](https://yapscan.targodan.de/reportFormat/latest/stats.schema.json) +Latest Schema: [stats.schema.json](https://yapscan.targodan.de/reportFormat/v1.0.0/stats.schema.json) / [stats.schema.html](https://yapscan.targodan.de/reportFormat/v1.0.0/stats.schema.html) ### systeminfo.json This file contains information about the scanned system. -Latest Schema: [systeminfo.schema.json](https://yapscan.targodan.de/reportFormat/latest/systeminfo.schema.json) +Latest Schema: [systeminfo.schema.json](https://yapscan.targodan.de/reportFormat/v1.0.0/systeminfo.schema.json) / [systeminfo.schema.html](https://yapscan.targodan.de/reportFormat/v1.0.0/systeminfo.schema.html) ### processes.json This file contains information about the scanned processes and their memory layouts. There is one JSON-Object per line in this file (splitting on `'\n'` is safe). -Latest Schema: [processes.schema.json](https://yapscan.targodan.de/reportFormat/latest/processes.schema.json) +Latest Schema: [processes.schema.json](https://yapscan.targodan.de/reportFormat/v1.0.0/processes.schema.json) / [processes.schema.html](https://yapscan.targodan.de/reportFormat/v1.0.0/processes.schema.html) ### memory-scans.json @@ -68,7 +68,7 @@ This file contains information about the scanned memory segments and any related There is one JSON-Object per line in this file (splitting on `'\n'` is safe). It may be omitted if no memory was scanned. -Latest Schema: [memory-scans.schema.json](https://yapscan.targodan.de/reportFormat/latest/memory-scans.schema.json) +Latest Schema: [memory-scans.schema.json](https://yapscan.targodan.de/reportFormat/v1.0.0/memory-scans.schema.json) / [memory-scans.schema.html](https://yapscan.targodan.de/reportFormat/v1.0.0/memory-scans.schema.html) ### file-scans.json @@ -76,4 +76,4 @@ This file contains information about the scanned files and any related yara rule There is one JSON-Object per line in this file (splitting on `'\n'` is safe). It may be omitted if no files were scanned. -Latest Schema: [file-scans.schema.json](https://yapscan.targodan.de/reportFormat/latest/file-scans.schema.json) +Latest Schema: [file-scans.schema.json](https://yapscan.targodan.de/reportFormat/v1.0.0/file-scans.schema.json) / [file-scans.schema.html](https://yapscan.targodan.de/reportFormat/v1.0.0/file-scans.schema.html) diff --git a/report/v1.0.0/datetime.schema.html b/report/v1.0.0/datetime.schema.html new file mode 100644 index 0000000..2e878c1 --- /dev/null +++ b/report/v1.0.0/datetime.schema.html @@ -0,0 +1 @@ + datetime

datetime

Type: string

Datetime in RFC3339 with timezone and added micro seconds.

Must match regular expression: ^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{6}Z[+-]\d{2}:\d{2}$ \ No newline at end of file diff --git a/report/v1.0.0/file-scans.schema.html b/report/v1.0.0/file-scans.schema.html new file mode 100644 index 0000000..6ef2c5d --- /dev/null +++ b/report/v1.0.0/file-scans.schema.html @@ -0,0 +1 @@ + file-scans

file-scans

Type: object

Scan results of file-scans. For each scanned file, where either a match was found or an error was emitted, one JSON object per line is stored.

Type: object

Information about the scanned file

No Additional Properties

Type: string

The path of the scanned file

Type: string

MD5 hexdigest of the executable file, if the file could be read

Type: string

SHA256 hexdigest of the executable file, if the file could be read

Type: array

Contains information about matched rules. Is empty-array if no rules matched.

Each item of this array must be:

Type: object

Information about a yara rule match

No Additional Properties

Type: array

The exact strings of the yara rule, that were found, including their offsets in the memory segment.

Each item of this array must be:

Type: object
No Additional Properties

Type: number

The offset, where the string was found, relative to the start of the scanned memory segment or file. Note, this value can get very large. make sure your parser uses an int64.

Type: string

The name of the matched string as defined in the yara rule

Type: number

Type: string

The namespace of the matched yara rule. This depends on how the rules where compiled.

Type: string

The name of the matched yara rule

Type: string or null

The error message or null if no error happened. Note, there may still be matches if an error happened.

\ No newline at end of file diff --git a/report/v1.0.0/match.schema.html b/report/v1.0.0/match.schema.html new file mode 100644 index 0000000..117e4dc --- /dev/null +++ b/report/v1.0.0/match.schema.html @@ -0,0 +1 @@ + match

match

Type: object

Information about a yara rule match

No Additional Properties

Type: array

The exact strings of the yara rule, that were found, including their offsets in the memory segment.

Each item of this array must be:

Type: object
No Additional Properties

Type: number

The offset, where the string was found, relative to the start of the scanned memory segment or file. Note, this value can get very large. make sure your parser uses an int64.

Type: string

The name of the matched string as defined in the yara rule

Type: number

Type: string

The namespace of the matched yara rule. This depends on how the rules where compiled.

Type: string

The name of the matched yara rule

\ No newline at end of file diff --git a/report/v1.0.0/memory-scans.schema.html b/report/v1.0.0/memory-scans.schema.html new file mode 100644 index 0000000..59cbcf0 --- /dev/null +++ b/report/v1.0.0/memory-scans.schema.html @@ -0,0 +1 @@ + memory-scans

memory-scans

Type: object

Memory scan results. For each scanned memory section, where either a match was found or an error was emitted, one JSON object per line is stored.

No Additional Properties

Type: array

Contains information about matched rules. Is empty-array if no rules matched.

Each item of this array must be:

Type: object

Information about a yara rule match

No Additional Properties

Type: array

The exact strings of the yara rule, that were found, including their offsets in the memory segment.

Each item of this array must be:

Type: object
No Additional Properties

Type: number

The offset, where the string was found, relative to the start of the scanned memory segment or file. Note, this value can get very large. make sure your parser uses an int64.

Type: string

The name of the matched string as defined in the yara rule

Type: number

Type: string

The namespace of the matched yara rule. This depends on how the rules where compiled.

Type: string

The name of the matched yara rule

Type: integer

PID of the scanned process

Type: string or null

The error message or null if no error happened. Note, there may still be matches if an error happened.

Type: integer

The base address of the scanned memory segment. This can be used to resolve the memory segment information in the processes.json. Note, this value can get very large. make sure your parser uses an int64.

\ No newline at end of file diff --git a/report/v1.0.0/meta.schema.html b/report/v1.0.0/meta.schema.html new file mode 100644 index 0000000..2c33800 --- /dev/null +++ b/report/v1.0.0/meta.schema.html @@ -0,0 +1 @@ + meta

meta

Type: object

Metainformation about the yapscan report.

Type: string

Version of Yapscan, used to generate the report

Type: object

SchemaURLs for the files of the report. There is one schema link for each JSON file contained in the report.

All property whose name matches the following regular expression must respect the following conditions

Property name regular expression: \.json$
Type: string

Type: string

Version of the report format

\ No newline at end of file diff --git a/report/v1.0.0/permissions.schema.html b/report/v1.0.0/permissions.schema.html new file mode 100644 index 0000000..14f030d --- /dev/null +++ b/report/v1.0.0/permissions.schema.html @@ -0,0 +1 @@ + permissions

permissions

Type: object

Permissions of a memory segment

No Additional Properties

Type: boolean

True, if readable

Type: boolean

True, if writable. If COW is true, this will be as well.

Type: boolean

True, if the Copy-On-Write flag is set.

Type: boolean

True, if executable

\ No newline at end of file diff --git a/report/v1.0.0/processes.schema.html b/report/v1.0.0/processes.schema.html new file mode 100644 index 0000000..beff71b --- /dev/null +++ b/report/v1.0.0/processes.schema.html @@ -0,0 +1 @@ + processes

processes

Type: object

Information about running processes of a scanned system

No Additional Properties

Type: integer

PID of the process

Type: enum (of string)

Bitness of the process

Must be one of:

  • "invalid"
  • "64Bit"
  • "32Bit"

Type: string or null

Path to the executable file of the process, if it could be determined


MD5 hexdigest of the executable file, if the file could be read

Type: string
Must match regular expression: ^[a-f0-9]{32}$
Type: null


SHA256 hexdigest of the executable file, if the file could be read

Type: string
Must match regular expression: ^[a-f0-9]{64}$
Type: null

Type: string

Name of the user, which the process is executed under

Type: array of object

Each item of this array must be:

Type: object
No Additional Properties

Type: integer

Address of the parent segment. This is equal to baseAddress if the segment is a root segment. Note, this value can get very large. make sure your parser uses an int64.

Type: integer

Address of this segment. Note, this value can get very large. make sure your parser uses an int64.

Type: object

The permissions, this segment was initialized with

No Additional Properties

Type: boolean

True, if readable

Type: boolean

True, if writable. If COW is true, this will be as well.

Type: boolean

True, if the Copy-On-Write flag is set.

Type: boolean

True, if executable

Type: object

The permissions, this segment had during time of the scan

Same definition as allocatedPermissions

Type: integer

Size of the segment in bytes. Note, this value can get very large. make sure your parser uses an int64.

Type: integer

The resident set size (RSS) of the segment in bytes. Only applicable on linux. Note, this value can get very large. make sure your parser uses an int64.

Type: enum (of string)

The state of the segment. Note that the state "reserve" is an approximation on linux; this will be set if the RSS is exactly zero.

Must be one of:

  • "commit"
  • "free"
  • "reserve"

Type: enum (of string)

The type of the memory segment

Must be one of:

  • "image"
  • "mapped"
  • "private"
  • "privateMapped"


Type: object
No Additional Properties

Type: string

The path of the mapped file

Type: string

MD5 hexdigest of the mapped file, if the file could be read

Type: string

SHA256 hexdigest of the mapped file, if the file could be read

\ No newline at end of file diff --git a/report/v1.0.0/schema_doc.css b/report/v1.0.0/schema_doc.css new file mode 100644 index 0000000..83897d8 --- /dev/null +++ b/report/v1.0.0/schema_doc.css @@ -0,0 +1,180 @@ +body { + font: 16px/1.5em "Overpass", "Open Sans", Helvetica, sans-serif; + color: #333; + font-weight: 300; + padding: 40px; +} + +.btn.btn-link { + font-size: 18px; +} + +.jsfh-animated-property { + animation: eclair; + animation-iteration-count: 1; + animation-fill-mode: forwards; + animation-duration: .75s; + +} + +@keyframes eclair { + 0%,100% { + transform: scale(1); + } + 50% { + transform: scale(1.03); + } +} + +.btn.btn-primary { + margin: 10px; +} + +.btn.example-show.collapsed:before { + content: "show" +} + +.btn.example-show:before { + content: "hide" +} + +.description.collapse:not(.show) { + max-height: 100px !important; + overflow: hidden; + + display: -webkit-box; + -webkit-line-clamp: 2; + -webkit-box-orient: vertical; +} + +.description.collapsing { + min-height: 100px !important; +} + +.collapse-description-link.collapsed:after { + content: '+ Read More'; +} + +.collapse-description-link:not(.collapsed):after { + content: '- Read Less'; +} + +.badge { + font-size: 100%; + margin-bottom: 0.5rem; + margin-top: 0.5rem; +} + +.badge.value-type { + font-size: 120%; + margin-right: 5px; + margin-bottom: 10px; +} + + +.badge.default-value { + font-size: 120%; + margin-left: 5px; + margin-bottom: 10px; +} + +.badge.restriction { + display: inline-block; +} + +.badge.required-property,.badge.deprecated-property,.badge.pattern-property,.badge.no-additional { + font-size: 100%; + margin-left: 10px; +} + +.accordion div.card:only-child { + border-bottom: 1px solid rgba(0, 0, 0, 0.125); +} + +.examples { + padding: 1rem !important; +} + +.examples pre { + margin-bottom: 0; +} + +.highlight.jumbotron { + padding: 1rem !important; +} + +.generated-by-footer { + margin-top: 1em; + text-align: right; +} + +/* From https://github.com/richleland/pygments-css/blob/master/friendly.css, see https://github.com/trentm/python-markdown2/wiki/fenced-code-blocks */ +.highlight { background: #e9ecef; } /* Changed from #f0f0f0 in the original style to be the same as bootstrap's jumbotron */ +.highlight .hll { background-color: #ffffcc } +.highlight .c { color: #60a0b0; font-style: italic } /* Comment */ +.highlight .err { border: 1px solid #FF0000 } /* Error */ +.highlight .k { color: #007020; font-weight: bold } /* Keyword */ +.highlight .o { color: #666666 } /* Operator */ +.highlight .ch { color: #60a0b0; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #60a0b0; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #007020 } /* Comment.Preproc */ +.highlight .cpf { color: #60a0b0; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #60a0b0; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #60a0b0; background-color: #fff0f0 } /* Comment.Special */ +.highlight .gd { color: #A00000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .gr { color: #FF0000 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #888888 } /* Generic.Output */ +.highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #0044DD } /* Generic.Traceback */ +.highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #007020 } /* Keyword.Pseudo */ +.highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #902000 } /* Keyword.Type */ +.highlight .m { color: #40a070 } /* Literal.Number */ +.highlight .s { color: #4070a0 } /* Literal.String */ +.highlight .na { color: #4070a0 } /* Name.Attribute */ +.highlight .nb { color: #007020 } /* Name.Builtin */ +.highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */ +.highlight .no { color: #60add5 } /* Name.Constant */ +.highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */ +.highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #007020 } /* Name.Exception */ +.highlight .nf { color: #06287e } /* Name.Function */ +.highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ +.highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #bb60d5 } /* Name.Variable */ +.highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mb { color: #40a070 } /* Literal.Number.Bin */ +.highlight .mf { color: #40a070 } /* Literal.Number.Float */ +.highlight .mh { color: #40a070 } /* Literal.Number.Hex */ +.highlight .mi { color: #40a070 } /* Literal.Number.Integer */ +.highlight .mo { color: #40a070 } /* Literal.Number.Oct */ +.highlight .sa { color: #4070a0 } /* Literal.String.Affix */ +.highlight .sb { color: #4070a0 } /* Literal.String.Backtick */ +.highlight .sc { color: #4070a0 } /* Literal.String.Char */ +.highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */ +.highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #4070a0 } /* Literal.String.Double */ +.highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */ +.highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */ +.highlight .sx { color: #c65d09 } /* Literal.String.Other */ +.highlight .sr { color: #235388 } /* Literal.String.Regex */ +.highlight .s1 { color: #4070a0 } /* Literal.String.Single */ +.highlight .ss { color: #517918 } /* Literal.String.Symbol */ +.highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #06287e } /* Name.Function.Magic */ +.highlight .vc { color: #bb60d5 } /* Name.Variable.Class */ +.highlight .vg { color: #bb60d5 } /* Name.Variable.Global */ +.highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */ +.highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */ +.highlight .il { color: #40a070 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/report/v1.0.0/schema_doc.min.js b/report/v1.0.0/schema_doc.min.js new file mode 100644 index 0000000..0d9c788 --- /dev/null +++ b/report/v1.0.0/schema_doc.min.js @@ -0,0 +1 @@ +function flashElement(t){myElement=document.getElementById(t),myElement.classList.add("jsfh-animated-property"),setTimeout(function(){myElement.classList.remove("jsfh-animated-property")},1e3)}function setAnchor(t){history.pushState({},"",t)}function anchorOnLoad(){let t=window.location.hash.split("?")[0].split("&")[0];"#"===t[0]&&(t=t.substr(1)),t.length>0&&anchorLink(t)}function anchorLink(t){$("#"+t).parents().addBack().filter(".collapse:not(.show), .tab-pane, [role='tab']").each(function(t){if($(this).hasClass("collapse"))$(this).collapse("show");else if($(this).hasClass("tab-pane")){const t=$("a[href='#"+$(this).attr("id")+"']");t&&t.tab("show")}else"tab"===$(this).attr("role")&&$(this).tab("show")}),setTimeout(function(){let e=document.getElementById(t);e&&(e.scrollIntoView({block:"center",behavior:"smooth"}),setTimeout(function(){flashElement(t)},500))},1e3)}$(document).on("click",'a[href^="#"]',function(t){t.preventDefault(),history.pushState({},"",this.href)}); \ No newline at end of file diff --git a/report/v1.0.0/stats.schema.html b/report/v1.0.0/stats.schema.html new file mode 100644 index 0000000..31ede2d --- /dev/null +++ b/report/v1.0.0/stats.schema.html @@ -0,0 +1 @@ + stats

stats

Type: object

Statistic information about the Yapscan run

No Additional Properties

Type: string

Datetime of the start of the scan. Format is RFC3339 with added micro seconds.

Must match regular expression: ^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{6}Z[+-]\d{2}:\d{2}$

Type: string

Datetime of the start of the scan. Format is RFC3339 with added micro seconds.

Same definition as start

Type: array

Each item of this array must be:

Type: object
No Additional Properties

Type: integer

The currently free RAM in bytes. Note, this value can get very large. make sure your parser uses an int64.

Type: integer

The currently free swap in bytes. Note, this value can get very large. make sure your parser uses an int64.

Type: number

The load average over the last minute, normalized over the number of CPUs, i.e. a value of 1.0 means the system is fully loaded. On linux this value can exceed 1.0, meaning processes are waiting for CPU time. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first minute of the scan.

Value must be greater or equal to 0.0

Type: number

The load average over the last five minutes, normalized over the number of CPUs, i.e. a value of 1.0 means the system is fully loaded. On linux this value can exceed 1.0, meaning processes are waiting for CPU time. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first 5 minutes of the scan.

Value must be greater or equal to 0.0

Type: number

The load average over the last fifteen minutes, normalized over the number of CPUs, i.e. a value of 1.0 means the system is fully loaded. On linux this value can exceed 1.0, meaning processes are waiting for CPU time. Note, that on windows load checking start with the scan, thus this value will be inaccurate for the first 15 minutes of the scan.

Value must be greater or equal to 0.0

Type: string

Datetime of the stats snapshot. Format is RFC3339 with added micro seconds.

Same definition as start

Type: number

The number of scanned processes

Type: number

The number of scanned memory segments

Type: number

Number of total bytes scanned from memory. Note, this value can get very large. make sure your parser uses an int64.

Type: number

The number of scanned files

Type: integer

Number of total bytes scanned from files. Note, this value can get very large. make sure your parser uses an int64.

\ No newline at end of file diff --git a/report/v1.0.0/systeminfo.schema.html b/report/v1.0.0/systeminfo.schema.html new file mode 100644 index 0000000..e6b11c3 --- /dev/null +++ b/report/v1.0.0/systeminfo.schema.html @@ -0,0 +1 @@ + Yapscan Systeminfo

Yapscan Systeminfo

Type: object

System information gathered by Yapscan

No Additional Properties

Type: integer

Total swap capacity in bytes. Note, this value can get very large. make sure your parser uses an int64.

Type: string

The hostname of the scanned system

Type: string

Operating system version

Type: integer

Total number of CPUs/Cores

Type: enum (of string)

The operating system's architecture

Must be one of:

  • "invalid"
  • "amd64"
  • "i386"

Type: string

Name of the operating system

Type: string

Flavour of the operating system

Type: array of string

List of local IPs

Each item of this array must be:

Type: string

Type: integer

Total installed RAM in bytes. Note, this value can get very large. make sure your parser uses an int64.

\ No newline at end of file From f4af9a2d8a38d6e76f04a52543fb65cff6c3353e Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Thu, 6 Jan 2022 10:36:37 +0100 Subject: [PATCH 25/26] Updates README with link to report format --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 6ad217f..e9b55f4 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ Yapscan is a **YA**ra based **P**rocess **SCAN**ner, aimed at giving more control about what to scan and giving detailed reports on matches. +**The report format is now versioned and a stable version 1.0.0 is released with compatibility guarantees, see the [report format documentation](report/v1.0.0/README.md).** + ## Features You can use yapscan to selectively scan the memory of running processes as well as files in local hard drives and/or mounted shares. From 98fafdcac2665be65df956232a62dd2045c535e1 Mon Sep 17 00:00:00 2001 From: Luca Corbatto Date: Thu, 6 Jan 2022 10:44:44 +0100 Subject: [PATCH 26/26] Bumps version --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index f8199e9..b510791 100644 --- a/version/version.go +++ b/version/version.go @@ -9,7 +9,7 @@ import ( var YapscanVersion = Version{ Major: 0, - Minor: 12, + Minor: 13, Bugfix: 0, }