Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Списки модераторов и ведущих вынесены в конфиг #13

Merged
merged 2 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/parser/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func main() {
time.Sleep(100 * time.Millisecond)
log.Printf("Task %v processed\n", taskID.String())

entry := questionanswer.NewEntry(taskID)
entry := questionanswer.NewEntry(taskID, cfg)

client := httpclient.New(nil)
err := entry.FetchData(client)
Expand Down
2 changes: 1 addition & 1 deletion cmd/service/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func main() {
time.Sleep(100 * time.Millisecond)
log.Printf("Task %v processed\n", taskID.String())

entry := questionanswer.NewEntry(taskID)
entry := questionanswer.NewEntry(taskID, cfg)

client := httpclient.New(nil)
err := entry.FetchData(client)
Expand Down
15 changes: 15 additions & 0 deletions config/parser/local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,21 @@ manticore:
- index: questions
- index: questions_ext
entry_chan_buffer: 20
questionanswer:
moderator:
- "Ведущий"
- "Ведущая:"
- "Дмитрий Таран:"
- "Сергей Будков:"
- "ВедущийЯ:"
- "Айнис Казимирович Петкус"
responsible:
- "Валерий Викторович Пякин:"
- "Валерий Викторович"
- "Пякин Валерий Викторович"
- "В.В. Пякин:"
- "Валерий"
- "Викторович Валерий Пякин"
splitter:
opt_chunk_size: 1800 # оптимальный размер фрагмента контента для поиска, на эти фрагменты будет разбит контент
max_chunk_size: 3600 # максимальный размер фрагмента контента для поиска
Expand Down
6 changes: 6 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ type Config struct {
RandomDelay *time.Duration `yaml:"random_delay" env-default:"150s"`
ManticoreIndex []Index `yaml:"manticore"`
EntryChanBuffer int `yaml:"entry_chan_buffer" env-default:"20"`
Questionanswer Questionanswer `yaml:"questionanswer"`
Splitter Splitter `yaml:"splitter"`
Parsers []Parser `yaml:"parsers"`
}
Expand All @@ -23,6 +24,11 @@ type Index struct {
Name string `yaml:"index"`
}

type Questionanswer struct {
Moderator []string `yaml:"moderator"`
Responsible []string `yaml:"responsible"`
}

type Parser struct {
Url string `yaml:"url"`
Current bool `yaml:"current" env-default:"true"`
Expand Down
58 changes: 37 additions & 21 deletions internal/qaparser/questionanswer/questionanswer.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,22 @@ import (
"unicode/utf8"

"github.com/PuerkitoBio/goquery"
"github.com/terratensor/svodd-server/internal/config"
"github.com/terratensor/svodd-server/internal/lib/httpclient"
"golang.org/x/net/html"
)

type Entry struct {
Url *url.URL
Title string
Video *url.URL
Datetime *time.Time
Content []QuestionAnswer
Fragments []Fragment
Comments []Comment
Html string
Url *url.URL
Title string
Video *url.URL
Datetime *time.Time
Content []QuestionAnswer
Fragments []Fragment
Comments []Comment
Html string
moderator []string
responsible []string
}

type Fragment struct {
Expand All @@ -48,9 +51,22 @@ type Comment struct {

const TypeComment = 3

func NewEntry(url *url.URL) *Entry {
var defaultModerator = []string{"Ведущий", "Ведущая:", "Дмитрий Таран:", "Сергей Будков:", "ВедущийЯ:", "Айнис Казимирович Петкус"}
var defaultResponsible = []string{"Валерий Викторович Пякин:", "Валерий Викторович", "Пякин Валерий Викторович", "В.В. Пякин:", "Валерий"}

func NewEntry(url *url.URL, cfg *config.Config) *Entry {
moderator := cfg.Questionanswer.Moderator
if condition := len(moderator) == 0; condition {
moderator = defaultModerator
}
responsible := cfg.Questionanswer.Responsible
if condition := len(responsible) == 0; condition {
responsible = defaultResponsible
}
return &Entry{
Url: url,
Url: url,
moderator: moderator,
responsible: responsible,
}
}

Expand Down Expand Up @@ -146,9 +162,6 @@ func parseAvatarFile(avatarFile string) *url.URL {
return u
}

var moderator = []string{"Ведущий", "Ведущая:", "Дмитрий Таран:", "Сергей Будков:", "ВедущийЯ:", "Айнис Казимирович Петкус"}
var responsible = []string{"Валерий Викторович Пякин:", "Валерий Викторович", "Пякин Валерий Викторович", "В.В. Пякин:", "Валерий"}

// SplitIntoChunks разбивает текст на вопросы и ответы.
// Он основан на поиске конкретных строк в тексте.
// Если нашелся текст "Ведущий:", то он начинает добавлять текст в массив вопросов.
Expand All @@ -173,9 +186,9 @@ func (e *Entry) SplitIntoChunks(els *goquery.Selection) {
text := strings.TrimSpace(s.Text())

// Мы ищем текст "Ведущий:".
moderatorIndex, curModerator := checkStrIndex(text, moderator)
moderatorIndex, curModerator := checkStrIndex(text, e.moderator)
// Мы ищем текст "Валерий Викторович Пякин:".
responsibleIndex, curResponsible := checkStrIndex(text, responsible)
responsibleIndex, curResponsible := checkStrIndex(text, e.responsible)

// Если нашелся текст "Ведущий:", то мы начинаем новый вопрос.
if moderatorIndex == 0 {
Expand Down Expand Up @@ -253,14 +266,17 @@ func (e *Entry) splitAnswers() {
// If not, add the answer without any changes.
if startAnswer {
// Check if the answer starts with "Валерий Викторович Пякин:".
responsibleIndex, _ := checkStrIndex(ans, responsible)
responsibleIndex, _ := checkStrIndex(ans, e.responsible)
if responsibleIndex != 0 && strings.Index(ans, "<strong>") != 0 {
// Add a <strong> tag to the answer.
fragment.QuestionAnswer += fmt.Sprintf(
"<p class=\"answer\"><strong>Валерий Викторович: … </strong>%v</p>",
ans,
)
startAnswer = false
// Условие если ans не пустой, во избежание висящих … без текста, когда были вырезаны изображения из исходника.
if ans != "" {
fragment.QuestionAnswer += fmt.Sprintf(
"<p class=\"answer\"><strong>Валерий Викторович: … </strong>%v</p>",
ans,
)
startAnswer = false
}
} else {
// Add the answer without any changes.
fragment.QuestionAnswer += fmt.Sprintf("<p class=\"answer\">%v</p>", ans)
Expand Down
Loading