From 257f397d5ffe4a3c634db3b6dad074dc6658cee6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maty=C3=A1=C5=A1=20Kopp?= Date: Fri, 29 Nov 2024 14:47:05 +0100 Subject: [PATCH 1/2] place derived formats in correct folders --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index b628544c2..9e6612666 100644 --- a/Makefile +++ b/Makefile @@ -356,7 +356,7 @@ text: $(text-XX) $(text-XX): text-%: % rm -f `ls ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/ParlaMint-$<_*.txt | grep -v '.ana.'` find -H ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX} -maxdepth 2 -type f -name "ParlaMint-$<_*.xml" | grep -v '.ana.' | $P --jobs 10 \ - '$s -xsl:Scripts/parlamint-tei2text.xsl {} > ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/{/.}.txt' + '$s -xsl:Scripts/parlamint-tei2text.xsl {} > ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/{.}.txt' text.ana-XX = $(addprefix text.ana-, $(PARLIAMENTS)) ## text.ana ## create text version from TEI.ana files @@ -365,7 +365,7 @@ text.ana: $(text.ana-XX) $(text.ana-XX): text.ana-%: % rm -f ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/ParlaMint-$<_*.ana.txt find -H ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX} -maxdepth 2 -type f -name "ParlaMint-$<_*.xml" | grep '.ana.' | $P --jobs 10 \ - '$s -xsl:Scripts/parlamint-tei2text.xsl {} > ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/{/.}.txt' + '$s -xsl:Scripts/parlamint-tei2text.xsl {} > ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/{.}.txt' @@ -377,7 +377,7 @@ $(meta-XX): meta-%: % rm -f ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/*-meta.tsv find -H ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX} -maxdepth 2 -type f -name "ParlaMint-*_*.xml" | grep -v '.ana.' | $P --jobs 10 \ '$s meta=../${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/ParlaMint-$<.xml -xsl:Scripts/parlamint2meta.xsl \ - {} > ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/{/.}-meta.tsv' + {} > ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/{.}-meta.tsv' From f14ec491ac14ee52eedde3911d622927ae7784ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maty=C3=A1=C5=A1=20Kopp?= Date: Mon, 2 Dec 2024 10:10:15 +0100 Subject: [PATCH 2/2] polish segments to text conversion --- Makefile | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 9e6612666..9496c4939 100644 --- a/Makefile +++ b/Makefile @@ -497,20 +497,25 @@ text.seg-XX = $(addprefix text.seg-, $(PARLIAMENTS)) text.seg: $(text.seg-XX) ## text-XX ## convert TEI files to text $(text.seg-XX): text.seg-%: % - mkdir -p ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg - rm -f `ls ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg/ParlaMint-$<_*.seg.txt | grep -v '.ana.'` - find -H ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX} -maxdepth 2 -type f -name "ParlaMint-$<_*.xml" | grep -v '.ana.' | $P --jobs 10 \ - '$s -xsl:Scripts/parlamint-tei2text.xsl element=seg {} > ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg/{/.}.txt' + @mkdir -p ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg + @rm -f ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg/ParlaMint-$<_*.seg.txt + @find -H ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX} -maxdepth 2 -type f -name "ParlaMint-$<_*.xml" | grep -v '.ana.' | $P --jobs 10 \ + '$s -xsl:Scripts/parlamint-tei2text.xsl element=seg {} > ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg/{/.}.seg.txt' + @echo "INFO: segments converted to text are stored in ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg" text.seg.ana-XX = $(addprefix text.seg.ana-, $(PARLIAMENTS)) -## text.seg ## create text version from TEI.ana files - each line contains one segment +## text.seg.ana ## create text version from TEI.ana files - each line contains one segment text.seg.ana: $(text.seg.ana-XX) ## text.seg.ana-XX ## convert TEI.seg.ana files to text $(text.seg.ana-XX): text.seg.ana-%: % - mkdir -p ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg - rm -f ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg/ParlaMint-$<_*.seg.ana.txt - find -H ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX} -maxdepth 2 -type f -name "ParlaMint-$<_*.xml" | grep '.ana.' | $P --jobs 10 \ - '$s -xsl:Scripts/parlamint-tei2text.xsl element=seg {} > ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg/{/.}.txt' + @mkdir -p ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg.ana + @rm -f ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg.ana/ParlaMint-$<_*.seg.txt + @find -H ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX} -maxdepth 2 -type f -name "ParlaMint-$<_*.xml" | grep '.ana.' | $P --jobs 10 \ + '$s -xsl:Scripts/parlamint-tei2text.xsl element=seg {} > ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg.ana/{/.}' + @find -H ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg.ana -type f | $P 'mv {} {.}.seg.txt' + @echo "INFO: annotated segments converted to text are stored in ${DATADIR}/ParlaMint-$<${CORPUSDIR_SUFFIX}/text.seg.ana" + + ######---------------