DwC Conversion Normal 0 / ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDTRANSNAMEYTRANSNAMESTATUSYSTATUSLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSSTARTDATEYSTARTDATEENDDATEYENDDATELOGDATEYLOGDATEDEPDATEYDEPDATEREPLAYDATEYREPLAYDATELOG_FIELDYLOG_FIELD
ID_BATCHYID_BATCHSEQ_NRYSEQ_NRLOGDATEYLOGDATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSINPUT_BUFFER_ROWSYINPUT_BUFFER_ROWSOUTPUT_BUFFER_ROWSYOUTPUT_BUFFER_ROWS
ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATELOGGING_OBJECT_TYPEYLOGGING_OBJECT_TYPEOBJECT_NAMEYOBJECT_NAMEOBJECT_COPYYOBJECT_COPYREPOSITORY_DIRECTORYYREPOSITORY_DIRECTORYFILENAMEYFILENAMEOBJECT_IDYOBJECT_IDOBJECT_REVISIONYOBJECT_REVISIONPARENT_CHANNEL_IDYPARENT_CHANNEL_IDROOT_CHANNEL_IDYROOT_CHANNEL_ID
ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSLOG_FIELDNLOG_FIELD
0.0 0.0 10000 50 50 N Y 50000 Y N 1000 100 - 2011/08/04 14:42:31.000 admin 2012/05/31 14:16:42.000 Check catalog number uniquenessAbortY Check catalog number uniquenessMerge join type infoY Sort occurrencesCheck catalog number uniquenessY Write imagesReduce to one image per datasetY Write identificationsReduce to one identification per datasetY UnionSort preferred identificationsY Replace state/provGroup named areasY Replace state/provDenormalise named areasY Replace latin ranksDenormalise higher taxaY Rename higher taxon fieldsSort higher taxaY Remove empty imagesTrash canY Reduce meta for metaSort metaY Reduce images for metaSort imgDatasetTitleY Reduce ident for metaSort identDatasetTitleY Read type informationGroup type infoY Read occurrencesOccurrence transformationsY Read named areasNormalise named area classesY Read metadataReduce to one per datasetY Read imagesImages transformationsY Read identificationsIdentification transformationsY Read higher taxaLower ranksY Check preferred flagGet first identification recordY Check preferred flagFilter preferred identificationsY Occurrence transformationsSort occurrencesY Normalise named area classesReplace state/provY Normalise preferred flag vocabularySort identificationsY Lower ranksReplace latin ranksY Lower preferred flagNormalise preferred flag vocabularyY Images transformationsRemove empty imagesY Identification transformationsLower preferred flagY Group type infoSort type infoY Group named areasSort higher geographyY Get first identification recordUnionY Filter preferred identificationsWrite identificationsY Filter preferred identificationsUnionY Denormalise named areasSort named areasY Denormalise higher taxaConcatenate higher taxaY Create meta document.xmlWrite meta documentY Create eml documentWrite eml documentY Concatenate higher taxaRename higher taxon fieldsY Sort type infoMerge join type infoY Sort preferred identificationsMerge join identificationsY Sort named areasMerge join named areasY Sort metaMerge images for metaY Sort imgDatasetTitleMerge images for metaY Sort identificationsMerge join higher taxaY Sort identDatasetTitleMerge identifications for metaY Sort higher taxaMerge join higher taxaY Sort higher geographyMerge join higher geographyY Reduce to one per datasetReduce meta for metaY Reduce to one per datasetCreate eml documentY Reduce to one image per datasetReduce images for metaY Reduce to one identification per datasetReduce ident for metaY Merge images for metaMerge identifications for metaY Merge identifications for metaCreate meta document.xmlY Merge join higher taxaCheck preferred flagY Merge join higher geographyMerge join named areasY Merge join type infoMerge join higher geographyY Merge join identificationsWrite occurrencesY AbortWrite duplicate catalog number to fileY Get file namesGet parametersY Get parametersRead occurrencesY Get parametersRead type informationY Get parametersRead named areasY Get parametersRead imagesY Get parametersRead identificationsY Get parametersRead higher taxaY Get parametersRead metadataY Remove empty imagesWrite imagesY Merge join named areasMerge join identificationsY Create eml documentWrite dataset listY Abort Abort Y 1 none 0 Duplicate catalog number found. N 642 29 Y Check catalog number uniqueness Unique Y 1 none N Y duplicates catalogNumber N 535 74 Y Check preferred flag SwitchCase Y 1 none PreferredFlag N String Get first identification record 0 Filter preferred identifications 1 Filter preferred identifications 933 459 Y Concatenate higher taxa ScriptValueMod Y 1 none N 0 Script 1 var higherClassification = replace(kingdom + "; " + phylum + "; " + classis + "; " + order + "; " + family, "null; ", "", "; null", "") higherClassification higherClassification String -1 -1 N 580 546 Y Create eml document ScriptValueMod N 1 none N 0 Script 1 // make up EML filename from dataset title // (replace forbidden characters first) emlFileName = base_dir + "/tmp/" + replace(title, "\\)", "", "\\(", "", "/", "_", " ", "_", "'", "", '"', '') + " eml"; datasetTitleModified = replace(title, "\\)", "", "\\(", "", "/", "_", " ", "_", "'", "", '"', ''); if (isEmpty(organisationName)) { organisationName = (isEmpty(creator))? 'n/a':creator } contactAddressSnippet = (contactAddress == "")? '':' <address><deliveryPoint>' + escapeXml(contactAddress) + '</deliveryPoint></address>\n'; var eml = '\ <?xml version="1.0" encoding="utf-8"?>\n\ <eml:eml xmlns:eml="eml://ecoinformatics.org/eml-2.1.1"\n\ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n\ xmlns:dc="http://purl.org/dc/terms/"\n\ xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.1 http://rs.gbif.org/schema/eml-gbif-profile/1.0/eml.xsd"\n\ packageId="619a4b95-1a82-4006-be6a-7dbe3c9b33c5/v7" system="http://gbif.org" scope="system">\n\ <dataset>\n\ <title>' + escapeXml(title) + '</title>\n\ <creator>\n\ <organizationName>' + escapeXml(organisationName) + '</organizationName>\n\ </creator>\n\ <metadataProvider>\n\ <organizationName>' + escapeXml(contactName) + '</organizationName>\n' + contactAddressSnippet + '\ <phone>' + escapeXml(contactPhone) + '</phone>\n\ <electronicMailAddress>' + escapeXml(contactEmail) + '</electronicMailAddress>\n\ </metadataProvider>\n\ <pubDate>' + year(new Date()) + '</pubDate>\n\ <abstract>\n\ <para>' + escapeXml(details) + '</para>\n\ </abstract>\n\ <intellectualRights>\n\ <para>' + escapeXml(intellectualRights) + '</para>\n\ </intellectualRights>\n\ <contact>\n\ <individualName>\n\ <surName>' + escapeXml(contactName) + '</surName>\n\ </individualName>\n' + contactAddressSnippet + '\ <phone>' + escapeXml(contactPhone) + '</phone>\n\ <electronicMailAddress>' + escapeXml(contactEmail) + '</electronicMailAddress>\n\ </contact>\n\ </dataset>\n\ <additionalMetadata>\n\ <metadata>\n\ <gbif>\n\ <dateStamp>' + replace(date2str(new Date(), "yyyy-MM-dd hh:mm:ss"), " ", "T") + '</dateStamp>\n\ <hierarchyLevel>dataset</hierarchyLevel>\n\ <citation>' + escapeXml(citation) + '</citation>\n\ <resourceLogoUrl>' + escapeXml(resourceLogoUrl) + '</resourceLogoUrl>\n\ </gbif>\n\ </metadata>\n\ </additionalMetadata>\n\ </eml:eml>\ ' eml eml String -1 -1 N emlFileName emlFileName String -1 -1 N datasetTitleModified datasetTitleModified String -1 -1 N organisationName organisationName String -1 -1 Y contactAddressSnippet contactAddressSnippet String -1 -1 N 443 825 Y Create meta document.xml ScriptValueMod Y 1 none N 0 Script 1 // make up meta.xml filename from dataset title // (replace forbidden characters first) metaFileName = base_dir + "/tmp/" + replace(title, "\\)", "", "\\(", "", "/", "_", " ", "_", "'", "", '"', '') + " meta"; var metaCore = '\ <?xml version="1.0" encoding="UTF-8"?>\n\ <archive xmlns="http://rs.tdwg.org/dwc/text/">\n\ \n\ <core encoding="UTF-8" fieldsTerminatedBy="," linesTerminatedBy="\\n" fieldsEnclosedBy=\'"\' ignoreHeaderLines="1" \n\ rowType="http://rs.tdwg.org/dwc/terms/Occurrence">\n\ <files>\n\ <location>occurrence.txt</location>\n\ </files>\n\ <id index="0" />\n\ <!-- Occurrence fields -->\n\ <field index="0" term="http://rs.tdwg.org/dwc/terms/catalogNumber"/>\n\ <field index="1" term="http://rs.tdwg.org/dwc/terms/institutionCode"/>\n\ <field index="2" term="http://rs.tdwg.org/dwc/terms/collectionCode"/>\n\ <field index="3" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>\n\ <field index="4" term="http://rs.tdwg.org/dwc/terms/occurrenceID"/>\n\ <field index="5" term="http://rs.tdwg.org/dwc/terms/fieldNumber"/>\n\ <field index="6" term="http://purl.org/dc/terms/modified"/>\n\ <field index="7" term="http://rs.tdwg.org/dwc/terms/eventID"/>\n\ <field index="8" term="http://rs.tdwg.org/dwc/terms/samplingProtocol"/>\n\ <field index="9" term="http://rs.tdwg.org/dwc/terms/habitat"/>\n\ <field index="10" term="http://rs.tdwg.org/dwc/terms/eventRemarks"/>\n\ <field index="11" term="http://rs.tdwg.org/dwc/terms/minimumElevationInMeters"/>\n\ <field index="12" term="http://rs.tdwg.org/dwc/terms/maximumElevationInMeters"/>\n\ <field index="13" term="http://rs.tdwg.org/dwc/terms/minimumDepthInMeters"/>\n\ <field index="14" term="http://rs.tdwg.org/dwc/terms/maximumDepthInMeters"/>\n\ <field index="15" term="http://rs.tdwg.org/dwc/terms/country"/>\n\ <field index="16" term="http://rs.tdwg.org/dwc/terms/countryCode"/>\n\ <field index="17" term="http://rs.tdwg.org/dwc/terms/locality"/>\n\ <field index="18" term="http://rs.tdwg.org/dwc/terms/verbatimLocality"/>\n\ <field index="19" term="http://rs.tdwg.org/dwc/terms/eventDate"/>\n\ <field index="20" term="http://rs.tdwg.org/dwc/terms/verbatimEventDate"/>\n\ <field index="21" term="http://rs.tdwg.org/dwc/terms/eventTime"/>\n\ <field index="22" term="http://rs.tdwg.org/dwc/terms/startDayOfYear"/>\n\ <field index="23" term="http://rs.tdwg.org/dwc/terms/endDayOfYear"/>\n\ <field index="24" term="http://rs.tdwg.org/dwc/terms/occurrenceDetails"/>\n\ <field index="25" term="http://rs.tdwg.org/dwc/terms/occurrenceRemarks"/>\n\ <field index="26" term="http://rs.tdwg.org/dwc/terms/sex"/>\n\ <field index="27" term="http://rs.tdwg.org/dwc/terms/decimalLatitude"/>\n\ <field index="28" term="http://rs.tdwg.org/dwc/terms/decimalLongitude"/>\n\ <field index="29" term="http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters"/>\n\ <field index="30" term="http://rs.tdwg.org/dwc/terms/verbatimCoordinateSystem"/>\n\ <field index="31" term="http://rs.tdwg.org/dwc/terms/verbatimSRS"/>\n\ <field index="32" term="http://rs.tdwg.org/dwc/terms/typeStatus"/>\n\ <field index="33" term="http://rs.tdwg.org/dwc/terms/stateProvince"/>\n\ <field index="34" term="http://rs.tdwg.org/dwc/terms/county"/>\n\ <field index="35" term="http://rs.tdwg.org/dwc/terms/municipality"/>\n\ <field index="36" term="http://rs.tdwg.org/dwc/terms/continent"/>\n\ <field index="37" term="http://rs.tdwg.org/dwc/terms/waterBody"/>\n\ <field index="38" term="http://rs.tdwg.org/dwc/terms/islandGroup"/>\n\ <field index="39" term="http://rs.tdwg.org/dwc/terms/island"/>\n\ <field index="40" term="http://rs.tdwg.org/dwc/terms/higherGeography"/>\n\ <!-- Identification fields -->\n\ <field index="41" term="http://rs.tdwg.org/dwc/terms/dateIdentified"/>\n\ <field index="42" term="http://rs.tdwg.org/dwc/terms/identifiedBy"/>\n\ <field index="43" term="http://rs.tdwg.org/dwc/terms/nomenclaturalCode"/>\n\ <field index="44" term="http://rs.tdwg.org/dwc/terms/taxonRemarks"/>\n\ <field index="45" term="http://rs.tdwg.org/dwc/terms/identificationQualifier"/>\n\ <field index="46" term="http://rs.tdwg.org/dwc/terms/identificationRemarks"/>\n\ <field index="47" term="http://rs.tdwg.org/dwc/terms/identificationReferences"/>\n\ <field index="48" term="http://rs.tdwg.org/dwc/terms/scientificName"/>\n\ <field index="49" term="http://rs.tdwg.org/dwc/terms/scientificNameAuthorship"/>\n\ <field index="50" term="http://rs.tdwg.org/dwc/terms/higherClassification"/>\n\ <field index="51" term="http://rs.tdwg.org/dwc/terms/kingdom"/>\n\ <field index="52" term="http://rs.tdwg.org/dwc/terms/phylum"/>\n\ <field index="53" term="http://rs.tdwg.org/dwc/terms/class"/>\n\ <field index="54" term="http://rs.tdwg.org/dwc/terms/order"/>\n\ <field index="55" term="http://rs.tdwg.org/dwc/terms/family"/>\n\ <field index="56" term="http://rs.tdwg.org/dwc/terms/genus"/>\n\ <field index="57" term="http://rs.tdwg.org/dwc/terms/subgenus"/>\n\ <field index="58" term="http://rs.tdwg.org/dwc/terms/specificEpithet"/>\n\ <field index="59" term="http://rs.tdwg.org/dwc/terms/infraspecificEpithet"/>\n\ <field index="60" term="http://rs.tdwg.org/dwc/terms/taxonRank"/>\n\ </core>\n\ ' var metaClosingTag = '\n</archive>' if (imgDatasetTitle == null) { var metaImages = '' } else { var metaImages = '\ \n\ <extension encoding="UTF-8" fieldsTerminatedBy="," linesTerminatedBy="\\n" fieldsEnclosedBy=\'"\' ignoreHeaderLines="1"\n\ rowType="http://rs.gbif.org/terms/1.0/Image">\n\ <files>\n\ <location>image.txt</location>\n\ </files>\n\ <coreid index="0" />\n\ <field index="1" term="http://purl.org/dc/terms/identifier"/>\n\ <field index="2" term="http://purl.org/dc/terms/description"/>\n\ <field index="3" term="http://purl.org/dc/terms/format"/>\n\ <field index="4" term="http://purl.org/dc/terms/created"/>\n\ <field index="5" term="http://purl.org/dc/terms/creator"/>\n\ <field index="6" term="http://purl.org/dc/terms/license"/>\n\ <field index="7" term="http://purl.org/dc/terms/rightsHolder"/>\n\ </extension>\n\ '} if (identDatasetTitle == null) { var metaIdent = '' } else { var metaIdent = '\ \n\ <extension encoding="UTF-8" fieldsTerminatedBy="," linesTerminatedBy="\\n" fieldsEnclosedBy=\'"\' ignoreHeaderLines="1"\n\ rowType="http://rs.tdwg.org/dwc/terms/Identification">\n\ <files>\n\ <location>identification.txt</location>\n\ </files>\n\ <coreid index="0" />\n\ <field index="1" term="http://rs.tdwg.org/dwc/terms/dateIdentified"/>\n\ <field index="2" term="http://rs.tdwg.org/dwc/terms/identifiedBy"/>\n\ <field index="3" term="http://rs.tdwg.org/dwc/terms/nomenclaturalCode"/>\n\ <field index="4" term="http://rs.tdwg.org/dwc/terms/taxonRemarks"/>\n\ <field index="5" term="http://rs.tdwg.org/dwc/terms/identificationQualifier"/>\n\ <field index="6" term="http://rs.tdwg.org/dwc/terms/identificationRemarks"/>\n\ <field index="7" term="http://rs.tdwg.org/dwc/terms/identificationReferences"/>\n\ <field index="8" term="http://rs.tdwg.org/dwc/terms/scientificName"/>\n\ <field index="9" term="http://rs.tdwg.org/dwc/terms/scientificNameAuthorship"/>\n\ <field index="10" term="http://rs.tdwg.org/dwc/terms/higherClassification"/>\n\ <field index="11" term="http://rs.tdwg.org/dwc/terms/kingdom"/>\n\ <field index="12" term="http://rs.tdwg.org/dwc/terms/phylum"/>\n\ <field index="13" term="http://rs.tdwg.org/dwc/terms/class"/>\n\ <field index="14" term="http://rs.tdwg.org/dwc/terms/order"/>\n\ <field index="15" term="http://rs.tdwg.org/dwc/terms/family"/>\n\ <field index="16" term="http://rs.tdwg.org/dwc/terms/genus"/>\n\ <field index="17" term="http://rs.tdwg.org/dwc/terms/subgenus"/>\n\ <field index="18" term="http://rs.tdwg.org/dwc/terms/specificEpithet"/>\n\ <field index="19" term="http://rs.tdwg.org/dwc/terms/infraspecificEpithet"/>\n\ <field index="20" term="http://rs.tdwg.org/dwc/terms/taxonRank"/>\n\ </extension>\n\ '} metaFileName metaFileName String -1 -1 N metaCore metaCore String -1 -1 N metaClosingTag metaClosingTag String -1 -1 N metaImages metaImages String -1 -1 N metaIdent metaIdent String -1 -1 N 952 739 Y Denormalise higher taxa Denormaliser Y 1 none HigherTaxonRank catalogNumber ScientificNameTree HigherTaxonName kingdom kingdom String -1 -1 CONCAT_COMMA HigherTaxonName pyhlum phylum String -1 -1 CONCAT_COMMA HigherTaxonName class classis String -1 -1 CONCAT_COMMA HigherTaxonName order order String -1 -1 CONCAT_COMMA HigherTaxonName family family String -1 -1 CONCAT_COMMA 476 545 Y Denormalise named areas Denormaliser Y 1 none AreaClass catalogNumber AreaName stateprovince stateProvince String -1 -1 CONCAT_COMMA AreaName county county String -1 -1 CONCAT_COMMA AreaName municipality municipality String -1 -1 CONCAT_COMMA AreaName continent continent String -1 -1 CONCAT_COMMA AreaName waterbody waterBody String -1 -1 CONCAT_COMMA AreaName islandgroup islandGroup String -1 -1 CONCAT_COMMA AreaName island island String -1 -1 CONCAT_COMMA 583 274 Y Filter preferred identifications FilterRows Y 1 none Union Write identifications N PreferredFlag = constantString1-1-1N 1027 502 Y Get file names FilesFromResult N 1 none 28 63 Y Get first identification record UniqueRowsByHashSet Y 1 none N N catalogNumber 1030 410 Y Get parameters GetVariable N 1 none sort_size ${sort_size} - 0 0 none base_dir ${base_dir} - 0 0 none 21 286 Y Group named areas MemoryGroupBy Y 1 none N catalogNumber higherGeography AreaName CONCAT_STRING ; 583 176 Y Group type info MemoryGroupBy Y 1 none N catalogNumber typeStatus typeStatus CONCAT_STRING ; 294 143 Y Identification transformations ScriptValueMod Y 1 none N 0 Script 1 // replace empty values with different concepts if (isEmpty(dateIdentified)) { dateIdentified = DateText; } if (isEmpty(identifiedBy)) { if (IdentifierFullName == '') { IdentifierFullName = trim(trim(IdentifierPrefix + " " + IdentifierGivenName) + " " + trim(IdentifierInheritedName + " " + IdentifierSuffix)); } identifiedBy = IdentifierFullName } if (isEmpty(scientificNameAuthorship)) { scientificNameAuthorship = (isEmpty(ZoologicalAuthorTeamOriginalAndYear))? BacterialAuthorTeamAndYear:ZoologicalAuthorTeamOriginalAndYear } if (isEmpty(genus)) { genus = (!isEmpty(ZoologicalGenusOrMonomial))? ZoologicalGenusOrMonomial: (!isEmpty(BacterialGenusOrMonomial))? BacterialGenusOrMonomial:ViralGenusOrMonomial } if (isEmpty(subgenus)) { subgenus = BacterialSubgenus } if (isEmpty(specificEpithet)) { specificEpithet = (!isEmpty(ZoologicalSpeciesEpithet))? ZoologicalSpeciesEpithet:BacterialSpeciesEpithet } if (isEmpty(infraspecificEpithet)) { infraspecificEpithet = (!isEmpty(ZoologicalSubspeciesEpithet))? ZoologicalSubspeciesEpithet:BacterialSubspeciesEpithet } // make up occurrence filename from dataset title identFileName = base_dir + "/tmp/" + replace(DatasetTitle, "\\)", "", "\\(", "", "/", "_", " ", "_", "'", "", '"', '') + " identification"; identFileName identFileName String -1 -1 N dateIdentified dateIdentified String -1 -1 Y identifiedBy identifiedBy String -1 -1 Y IdentifierFullName IdentifierFullName String -1 -1 Y scientificNameAuthorship scientificNameAuthorship String -1 -1 Y genus genus String -1 -1 Y subgenus subgenus String -1 -1 Y specificEpithet specificEpithet String -1 -1 Y infraspecificEpithet infraspecificEpithet String -1 -1 Y 307 463 Y Images transformations ScriptValueMod Y 1 none N 0 Script 1 // replace empty value if (identifier == '') { var identifier = ProductURI; } // make up images filename from dataset title // (replace forbidden characters first) imgFileName = base_dir + "/tmp/" + replace(DatasetTitle, "\\)", "", "\\(", "", "/", "_", " ", "_", "'", "", '"', '') + " image"; imgFileName imgFileName String -1 -1 N identifier identifier String -1 -1 Y 296 352 Y Lower preferred flag StringOperations Y 1 none PreferredFlag none lower none no none none none 438 464 Y Lower ranks StringOperations Y 1 none HigherTaxonRank none lower none no none none none 274 543 Y Merge identifications for meta MergeJoin Y 1 none LEFT OUTER Merge images for meta Sort identDatasetTitle title identDatasetTitle 796 736 Y Merge images for meta MergeJoin Y 1 none LEFT OUTER Sort meta Sort imgDatasetTitle title imgDatasetTitle 653 736 Y Merge join higher geography MergeJoin Y 1 none LEFT OUTER Merge join type info Sort higher geography catalogNumber catalogNumber 783 99 Y Merge join higher taxa MergeJoin N 1 none LEFT OUTER Sort identifications Sort higher taxa catalogNumber ScientificNameTree catalogNumber ScientificNameTree 823 461 Y Merge join identifications MergeJoin Y 1 none LEFT OUTER Merge join named areas Sort preferred identifications catalogNumber catalogNumber 1129 101 Y Merge join named areas MergeJoin Y 1 none LEFT OUTER Merge join higher geography Sort named areas catalogNumber catalogNumber 938 99 Y Merge join type info MergeJoin Y 1 none LEFT OUTER Check catalog number uniqueness Sort type info catalogNumber catalogNumber 635 98 Y Normalise named area classes StringOperations Y 1 none AreaClass both lower none no none none none 288 247 Y Normalise preferred flag vocabulary ValueMapper Y 1 none PreferredFlag true 1 yes 1 y 1 false 0 no 0 n 0 544 464 Y Occurrence transformations ScriptValueMod Y 1 none N 0 Script 1 // if coordinates are present, set verbatimCoordinateSystem if (isEmpty(decimalLongitude) && isEmpty(decimalLatitude)) { verbatimCoordinateSystem = ""; } else { verbatimCoordinateSystem = "decimal degrees"; } // make up occurrence filename from dataset title occFileName = base_dir + "/tmp/" + replace(DatasetTitle, "\\)", "", "\\(", "", "/", "_", " ", "_", "'", "", '"', '') + " occurrence"; errFileName = base_dir + "/duplicate" occFileName occFileName String -1 -1 N verbatimCoordinateSystem verbatimCoordinateSystem String -1 -1 N errFileName errFileName String -1 -1 N 289 55 Y Read higher taxa getXMLData Y 1 none N N N N N N N N N Y UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N HigherTaxonName abcd:HigherTaxonName node String -1 -1 none N HigherTaxonRank abcd:HigherTaxonRank node String -1 -1 none N catalogNumber ../../../../../../abcd:UnitID node String -1 -1 none N ScientificNameTree ../../abcd:ScientificName node String -1 -1 both N 0 /abcd:DataSets/abcd:DataSet/abcd:Units/abcd:Unit/abcd:Identifications/abcd:Identification/abcd:Result/abcd:TaxonIdentified/abcd:HigherTaxa/abcd:HigherTaxon Y Y path 160 543 Y Read identifications getXMLData Y 1 none N N N N N N N N N Y UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N catalogNumber ../../abcd:UnitID node String -1 -1 none N ScientificNameTree abcd:Result/abcd:TaxonIdentified/abcd:ScientificName node String -1 -1 both N dateIdentified abcd:Date/abcd:ISODateTimeBegin node String -1 -1 none N DateText abcd:Date/abcd:DateText node String -1 -1 none N identifiedBy abcd:Identifiers/abcd:IdentifiersText node String -1 -1 none N IdentifierFullName abcd:Identifiers/abcd:Identifier/abcd:PersonName/abcd:FullName node String -1 -1 none N IdentifierInheritedName abcd:Identifiers/abcd:Identifier/abcd:PersonName/abcd:AtomisedName/abcd:InheritedName node String -1 -1 none N IdentifierGivenName abcd:Identifiers/abcd:Identifier/abcd:PersonName/abcd:AtomisedName/abcd:GivenName node String -1 -1 none N IdentifierPrefix abcd:Identifiers/abcd:Identifier/abcd:PersonName/abcd:AtomisedName/abcd:Prefix node String -1 -1 none N IdentifierSuffix abcd:Identifiers/abcd:Identifier/abcd:PersonName/abcd:AtomisedName/abcd:Suffix node String -1 -1 none N nomenclaturalCode abcd:Result/abcd:TaxonIdentified/abcd:Code node String -1 -1 none N taxonRemarks abcd:Result/abcd:TaxonIdentified/abcd:NameComments node String -1 -1 none N identificationQualifier abcd:Result/abcd:TaxonIdentified/abcd:IdentificationQualifier node String -1 -1 none N identificationRemarks abcd:Notes node String -1 -1 none N identificationReferences abcd:References/abcd:Reference/abcd:TitleCitation node String -1 -1 none N scientificName abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:FullScientificNameString node String -1 -1 none N scientificNameAuthorship abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Botanical/abcd:AuthorTeam node String -1 -1 none N ZoologicalAuthorTeamOriginalAndYear abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Zoological/abcd:AuthorTeamOriginalAndYear node String -1 -1 none N BacterialAuthorTeamAndYear abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Bacterial/abcd:AuthorTeamAndYear node String -1 -1 none N genus abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Botanical/abcd:GenusOrMonomial node String -1 -1 none N ZoologicalGenusOrMonomial abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Zoological/abcd:GenusOrMonomial node String -1 -1 none N BacterialGenusOrMonomial abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Bacterial/abcd:GenusOrMonomial node String -1 -1 none N ViralGenusOrMonomial abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Viral/abcd:GenusOrMonomial node String -1 -1 none N subgenus abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Zoological/abcd:Subgenus node String -1 -1 none N BacterialSubgenus abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Bacterial/abcd:Subgenus node String -1 -1 none N specificEpithet abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Botanical/abcd:FirstEpithet node String -1 -1 none N ZoologicalSpeciesEpithet abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Zoological/abcd:SpeciesEpithet node String -1 -1 none N BacterialSpeciesEpithet abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Bacterial/abcd:SpeciesEpithet node String -1 -1 none N infraspecificEpithet abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Botanical/abcd:InfraspecificEpithet node String -1 -1 none N ZoologicalSubspeciesEpithet abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Zoological/abcd:SubspeciesEpithet node String -1 -1 none N BacterialSubspeciesEpithet abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Bacterial/abcd:SubspeciesEpithet node String -1 -1 none N taxonRank abcd:Result/abcd:TaxonIdentified/abcd:ScientificName/abcd:NameAtomised/abcd:Botanical/abcd:Rank node String -1 -1 none N PreferredFlag abcd:PreferredFlag node String -1 -1 none N DatasetTitle ../../../../abcd:Metadata/abcd:Description/abcd:Representation/abcd:Title node String -1 -1 none N 0 /abcd:DataSets/abcd:DataSet/abcd:Units/abcd:Unit/abcd:Identifications/abcd:Identification Y Y path 165 462 Y Read images getXMLData Y 1 none N N N N N N N N N Y UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N identifier abcd:FileURI node String -1 -1 none N ProductURI abcd:ProductURI node String -1 -1 none N description abcd:Comment node String -1 -1 none N format abcd:Format node String -1 -1 none N created abcd:CreatedDate node String -1 -1 none N creator abcd:Creator node String -1 -1 none N license abcd:IPR/abcd:Licenses/abcd:License/abcd:Text node String -1 -1 none N rightsHolder abcd:IPR/abcd:IPRDeclarations/abcd:IPRDeclaration/abcd:Text node String -1 -1 none N catalogNumber ../../abcd:UnitID node String -1 -1 none N DatasetTitle ../../../../abcd:Metadata/abcd:Description/abcd:Representation/abcd:Title node String -1 -1 none N 0 /abcd:DataSets/abcd:DataSet/abcd:Units/abcd:Unit/abcd:MultiMediaObjects/abcd:MultiMediaObject Y Y path 161 349 Y Read metadata getXMLData Y 1 none N N N N N N N N N Y UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N title abcd:Metadata/abcd:Description/abcd:Representation/abcd:Title node String -1 -1 none N details abcd:Metadata/abcd:Description/abcd:Representation/abcd:Details node String -1 -1 none N creator abcd:Metadata/abcd:RevisionData/abcd:Creators node String -1 -1 none N intellectualRights abcd:Metadata/abcd:IPRStatements/abcd:IPRDeclarations/abcd:IPRDeclaration/abcd:Text node String -1 -1 none N citation abcd:Metadata/abcd:IPRStatements/abcd:Citations/abcd:Citation/abcd:Text node String -1 -1 none N resourceLogoUrl abcd:Metadata/abcd:IconURI node String -1 -1 none N contactName abcd:ContentContacts/abcd:ContentContact/abcd:Name node String -1 -1 none N contactAddress abcd:ContentContacts/abcd:ContentContact/abcd:Address node String -1 -1 none N contactPhone abcd:ContentContacts/abcd:ContentContact/abcd:Phone node String -1 -1 none N contactEmail abcd:ContentContacts/abcd:ContentContact/abcd:Email node String -1 -1 none N organisationName abcd:Metadata/abcd:Owners/abcd:Owner/abcd:Organisation/abcd:Name/abcd:Representation/abcd:Text node String -1 -1 none N 0 /abcd:DataSets/abcd:DataSet Y Y path 167 734 Y Read named areas getXMLData Y 1 none N N N N N N N N N Y rowNum UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N AreaClass abcd:AreaClass node String -1 -1 none N AreaName abcd:AreaName node String -1 -1 none N catalogNumber ../../../abcd:UnitID node String -1 -1 none N 0 /abcd:DataSets/abcd:DataSet/abcd:Units/abcd:Unit/abcd:Gathering/abcd:NamedAreas/abcd:NamedArea Y Y path 156 246 Y Read occurrences getXMLData Y 1 none N N N N N N N N N Y UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N institutionCode abcd:SourceInstitutionID node String -1 -1 none N collectionCode abcd:SourceID node String -1 -1 none N catalogNumber abcd:UnitID node String -1 -1 none N basisOfRecord abcd:RecordBasis node String -1 -1 none N occurrenceID abcd:UnitGUID node String -1 -1 none N fieldNumber abcd:CollectorsFieldNumber node String -1 -1 none N modified abcd:DateLastEdited node String -1 -1 none N minimumElevationInMeters abcd:Gathering/abcd:Altitude/abcd:MeasurementOrFactAtomised/abcd:LowerValue node String -1 -1 none N maximumElevationInMeters abcd:Gathering/abcd:Altitude/abcd:MeasurementOrFactAtomised/abcd:UpperValue node String -1 -1 none N minimumDepthInMeters abcd:Gathering/abcd:Depth/abcd:MeasurementOrFactAtomised/abcd:LowerValue node String -1 -1 none N maximumDepthInMeters abcd:Gathering/abcd:Depth/abcd:MeasurementOrFactAtomised/abcd:UpperValue node String -1 -1 none N country abcd:Gathering/abcd:Country/abcd:Name node String -1 -1 none N countryCode abcd:Gathering/abcd:Country/abcd:ISO3166Code node String -1 -1 none N locality abcd:Gathering/abcd:LocalityText node String -1 -1 none N verbatimLocality abcd:Gathering/abcd:AreaDetail node String -1 -1 none N eventDate abcd:Gathering/abcd:DateTime/abcd:ISODateTimeBegin node String -1 -1 none N verbatimEventDate abcd:Gathering/abcd:DateTime/abcd:DateText node String -1 -1 none N eventTime abcd:Gathering/abcd:DateTime/abcd:TimeOfDayBegin node String -1 -1 none N startDayOfYear abcd:Gathering/abcd:DateTime/abcd:DayNumberBegin node String -1 -1 none N endDayOfYear abcd:Gathering/abcd:DateTime/abcd:DayNumberEnd node String -1 -1 none N decimalLongitude abcd:Gathering/abcd:SiteCoordinateSets/abcd:SiteCoordinates/abcd:CoordinatesLatLong/abcd:LongitudeDecimal node String -1 -1 none N decimalLatitude abcd:Gathering/abcd:SiteCoordinateSets/abcd:SiteCoordinates/abcd:CoordinatesLatLong/abcd:LatitudeDecimal node String -1 -1 none N coordinateUncertaintyInMeters abcd:Gathering/abcd:SiteCoordinateSets/abcd:SiteCoordinates/abcd:CoordinatesLatLong/abcd:CoordinateErrorDistanceInMeters node String -1 -1 none N verbatimSRS abcd:Gathering/abcd:SiteCoordinateSets/abcd:SiteCoordinates/abcd:CoordinatesLatLong/abcd:SpatialDatum node String -1 -1 none N eventID abcd:Gathering/abcd:Code node String -1 -1 none N samplingProtocol abcd:Gathering/abcd:Method node String -1 -1 none N habitat abcd:Gathering/abcd:Biotope/abcd:Text node String -1 -1 none N eventRemarks abcd:Gathering/abcd:Notes node String -1 -1 none N occurrenceRemarks abcd:Notes node String -1 -1 none N occurrenceDetails abcd:RecordURI node String -1 -1 none N sex abcd:Sex node String -1 -1 none N DatasetTitle ../../abcd:Metadata/abcd:Description/abcd:Representation/abcd:Title node String -1 -1 none N 0 /abcd:DataSets/abcd:DataSet/abcd:Units/abcd:Unit Y Y path 158 55 Y Read type information getXMLData Y 1 none N N N N N N N N N Y rowNum UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N N N typeStatus abcd:TypeStatus node String -1 -1 none N catalogNumber ../../../abcd:UnitID node String -1 -1 none N 0 /abcd:DataSets/abcd:DataSet/abcd:Units/abcd:Unit/abcd:SpecimenUnit/abcd:NomenclaturalTypeDesignations/abcd:NomenclaturalTypeDesignation Y Y path 156 143 Y Reduce ident for meta SelectValues Y 1 none DatasetTitle identDatasetTitle -2 -2 N 829 668 Y Reduce images for meta SelectValues Y 1 none DatasetTitle imgDatasetTitle -2 -2 N 591 611 Y Reduce meta for meta SelectValues Y 1 none title -2 -2 base_dir -2 -2 N 440 736 Y Reduce to one identification per dataset UniqueRowsByHashSet N 1 none N N DatasetTitle 1030 668 Y Reduce to one image per dataset UniqueRowsByHashSet N 1 none N N DatasetTitle 897 610 Y Reduce to one per dataset UniqueRowsByHashSet N 1 none N N title 297 735 Y Remove empty images FilterRows Y 1 none Write images Trash can N identifier IS NOT NULL 437 352 Y Rename higher taxon fields SelectValues Y 1 none N classis class - -2 -2 false 700 546 Y Replace latin ranks ValueMapper Y 1 none HigherTaxonRank regnum kingdom subregnum subkingdom superclassis superclass classis class subclassis subclass superordo superorder ordo order subordo suborder superfamilia superfamily familia family subfamilia subfamily tribus tribe 371 544 Y Replace state/prov ValueMapper N 1 none AreaClass state stateprovince province stateprovince 438 247 Y Sort higher geography SortRows Y 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y 717 175 Y Sort higher taxa SortRows N 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y ScientificNameTree Y Y 789 547 Y Sort identDatasetTitle SortRows N 1 none %%java.io.tmpdir%% out ${sort_size} N N identDatasetTitle Y Y 679 668 Y Sort identifications SortRows Y 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y ScientificNameTree Y Y 688 464 Y Sort imgDatasetTitle SortRows N 1 none %%java.io.tmpdir%% out ${sort_size} N N imgDatasetTitle Y Y 418 612 Y Sort meta SortRows N 1 none %%java.io.tmpdir%% out ${sort_size} N N title Y Y 538 736 Y Sort named areas SortRows Y 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y 746 277 Y Sort occurrences SortRows Y 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y 425 54 Y Sort preferred identifications SortRows Y 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y 1126 234 Y Sort type info SortRows Y 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y 428 143 Y Trash can Dummy Y 1 none 565 387 Y Union Dummy Y 1 none 1126 456 Y Write dataset list TextFileOutput Y 1 none ; " N N
N
N
DOS None UTF-8 N ${base_dir}/tmp/datasets N Y list N N N N N N Y N N 0 datasetTitleModified String none -1 -1 575 868 Y
Write duplicate catalog number to file TextFileOutput Y 1 none ; " N N
N
N
DOS None Y errFileName output\duplicate N Y txt N N N N N N Y N N 0 catalogNumber String none -1 -1 800 27 Y
Write eml document TextFileOutput Y 1 none ; N N
N
N
DOS None UTF-8 Y emlFileName file N Y xml N N N N N N Y N Y 0 eml String none -1 -1 1149 829 Y
Write identifications TextFileOutput Y 1 none , " Y N
Y
N
DOS None UTF-8 Y identFileName C:\Users\j.holetschek\Desktop\pentaho\occurrence N Y txt N N N N N N Y N Y 0 catalogNumber String none -1 -1 dateIdentified String none -1 -1 identifiedBy String none -1 -1 nomenclaturalCode String none -1 -1 taxonRemarks String none -1 -1 identificationQualifier String none -1 -1 identificationRemarks String none -1 -1 identificationReferences String none -1 -1 scientificName String none -1 -1 scientificNameAuthorship String none -1 -1 higherClassification String none -1 -1 kingdom String none -1 -1 phylum String none -1 -1 class String none -1 -1 order String none -1 -1 family String none -1 -1 genus String none -1 -1 subgenus String none -1 -1 specificEpithet String none -1 -1 infraspecificEpithet String none -1 -1 taxonRank String none -1 -1 1149 550 Y
Write images TextFileOutput Y 1 none , " Y N
Y
N
DOS None UTF-8 Y imgFileName C:\Users\j.holetschek\Desktop\pentaho\occurrence N Y txt N N N N N N Y N Y 0 catalogNumber String none -1 -1 identifier String none -1 -1 description String none -1 -1 format String none -1 -1 created String none -1 -1 creator String none -1 -1 license String none -1 -1 rightsHolder String none -1 -1 896 354 Y
Write meta document TextFileOutput Y 1 none N N
N
N
DOS None UTF-8 Y metaFileName file N Y xml N N N N N N Y N Y 0 metaCore String none -1 -1 metaImages String none -1 -1 metaIdent String none -1 -1 metaClosingTag String none -1 -1 1151 739 Y
Write occurrences TextFileOutput Y 1 none , " Y N
Y
N
DOS None UTF-8 Y occFileName C:\Users\j.holetschek\Desktop\pentaho\occurrence N Y txt N N N N N N Y N Y 0 catalogNumber String none -1 -1 institutionCode String none -1 -1 collectionCode String none -1 -1 basisOfRecord String none -1 -1 occurrenceID String none -1 -1 fieldNumber String none -1 -1 modified String none -1 -1 eventID String none -1 -1 samplingProtocol String none -1 -1 habitat String none -1 -1 eventRemarks String none -1 -1 minimumElevationInMeters String none -1 -1 maximumElevationInMeters String none -1 -1 minimumDepthInMeters String none -1 -1 maximumDepthInMeters String none -1 -1 country String none -1 -1 countryCode String none -1 -1 locality String none -1 -1 verbatimLocality String none -1 -1 eventDate String none -1 -1 verbatimEventDate String none -1 -1 eventTime String none -1 -1 startDayOfYear String none -1 -1 endDayOfYear String none -1 -1 occurrenceDetails String none -1 -1 occurrenceRemarks String none -1 -1 sex String none -1 -1 decimalLatitude String none -1 -1 decimalLongitude String none -1 -1 coordinateUncertaintyInMeters String none -1 -1 verbatimCoordinateSystem String none -1 -1 verbatimSRS String none -1 -1 typeStatus String none -1 -1 stateProvince String none -1 -1 county String none -1 -1 municipality String none -1 -1 continent String none -1 -1 waterBody String none -1 -1 islandGroup String none -1 -1 island String none -1 -1 higherGeography String none -1 -1 dateIdentified String none -1 -1 identifiedBy String none -1 -1 nomenclaturalCode String none -1 -1 taxonRemarks String none -1 -1 identificationQualifier String none -1 -1 identificationRemarks String none -1 -1 identificationReferences String none -1 -1 scientificName String none -1 -1 scientificNameAuthorship String none -1 -1 higherClassification String none -1 -1 kingdom String none -1 -1 phylum String none -1 -1 class String none -1 -1 order String none -1 -1 family String none -1 -1 genus String none -1 -1 subgenus String none -1 -1 specificEpithet String none -1 -1 infraspecificEpithet String none -1 -1 taxonRank String none -1 -1 1140 15 Y
Check catalog number uniqueness Abort Y N