CNF Database

Useful Queries

psql -h myconfluencedb -U atlconfluence -W -d confluence

\pset format wrapped

Spaces

SELECT spaceid,

       spacename

  FROM spaces

 ORDER BY spacename;

  spaceid  |                       spacename-----------+-------------------------------------------------------------    999999 | Database  50758633 | UNIX   6718445 | Network  29688809 | Windows

Pages

\prompt 'spaceid> ' space

SELECT c.contentid,

       c.contenttype, 

       c.title

  FROM content c

 WHERE c.spaceid = :space

   AND c.contenttype = 'PAGE'

   AND c.content_status = 'current'

 ORDER BY c.title;

SELECT COUNT(c.contentid) AS "number of pages",

       s.spaceid,

       s.spacename

  FROM content c

  JOIN spaces s ON c.spaceid = s.spaceid

 WHERE c.spaceid IS NOT NULL

   AND c.prevver IS NULL

   AND c.contenttype = 'PAGE'

   AND c.content_status = 'current'

 GROUP BY s.spacename,

          s.spaceid

 ORDER BY "number of pages" DESC;

 contentid | contenttype |                     title-----------+-------------+------------------------------------------------  82182173 | PAGE        | Add Article in Replication using SSMS  14549158 | PAGE        | Ad-Hoc Checks - MS-SQL  87785672 | PAGE        | AlwaysON DB Not Synchronizing State   7864670 | PAGE        | Checks  21495895 | PAGE        | Cloud  77299728 | PAGE        | Configuring SQL Always On AG 154861610 | PAGE        | Database Backup  88506505 | PAGE        | Deinstall Oracle OEM agent
 number of pages |                      spacename-----------------+--------------------------------------------------------             877 | IT Knowledge Base             513 | Operations             242 | Database             171 | UNIX

SELECT c.title,

       u.username AS Creator,

       c.creationdate,

       c.lastmoddate,

       um.username AS LastModifier

  FROM content c

  JOIN user_mapping u  ON c.creator      = u.user_key

  JOIN user_mapping um ON c.lastmodifier = um.user_key

 WHERE c.prevver IS NULL

   AND c.contenttype = 'PAGE'

   AND c.content_status = 'current'

 ORDER BY c.title;

SELECT s.spacekey,

       c.title, 

       u.username AS Creator, 

       c.creationdate,

       um.username AS LastModifier,

       c.lastmoddate

  FROM content c

  JOIN spaces s        ON c.spaceid      = s.spaceid

  JOIN user_mapping u  ON c.creator      = u.user_key 

  JOIN user_mapping um ON c.lastmodifier = um.user_key

 WHERE c.prevver IS NULL 

   AND c.contenttype = 'PAGE' 

   AND c.content_status = 'current'

   AND s.spacekey IN ('MYSPACE1','MYSPACE2','etc')

 ORDER BY s.spacekey,

          c.title;

                           title                           |                  creator                   |      creationdate       |       lastmoddate       |                lastmodifier-----------------------------------------------------------+--------------------------------------------+-------------------------+-------------------------+-------------------------------------------- 010822 MyPage1                                            | myuser                                     | 2022-08-02 07:08:20.575 | 2022-08-05 14:36:33.897 | myuser                                      020522 MyPage2                                            | myuser                                     | 2022-05-03 13:28:10.946 | 2022-05-09 15:37:32.275 | myuser                                      030123 MyPage3                                            | myuser                                     | 2023-01-03 08:17:53.933 | 2023-01-06 15:01:00.149 | myuser                                     

SELECT COUNT(c.version) AS "number of versions",

       c.title,

       s.spacename

  FROM content c 

  LEFT JOIN spaces s ON c.spaceid= s.spaceid

 WHERE c.contenttype = 'PAGE'

   AND s.spacename = 'Database'

 GROUP BY s.spacename,

          c.title

 ORDER BY c.title ASC;

 number of versions |                                      title                                      |           spacename--------------------+---------------------------------------------------------------------------------+--------------------------------                 12 |                                                                                 | Database                   2 | Add Article in Replication using SSMS                                           | Database                   2 | Ad-Hoc Checks - MS-SQL                                                          | Database                  1 | AlwaysON DB Not Synchronizing State                                             | Database                  2 | Checks                                                                          | Database                  2 | Cloud                                                                           | Database                  2 | Configuring SQL Always On AG                                                    | Database                  2 | Database Backup                                                                 | Database                  2 | Deinstall Oracle OEM agent                                                      | Database

SELECT c.contentid,

       c.pageid,

       c.parentid,

       c.title,

       s.spaceid,

       s.spacename

  FROM content c 

  LEFT JOIN spaces s ON c.spaceid= s.spaceid

 WHERE c.contenttype = 'PAGE'

   AND c.content_status = 'current'

 ORDER BY s.spacename, c.title ASC;

Page Content

SELECT body

  FROM bodycontent

 WHERE contentid = 999999;

SELECT bc.body

  FROM bodycontent bc

  JOIN content c ON bc.contentid = c.contentid

 WHERE c.title = 'page_title'

   AND c.prevver IS NULL \g filename.htm

SELECT bc.body

  FROM bodycontent bc

  JOIN content c ON bc.contentid = c.contentid

 WHERE c.contenttype = 'PAGE'

   AND c.content_status = 'current'

   AND c.pageid = 99999999;

The example below enables you to extract HTML files from the database. The directory structure works with the attachment extraction detailed later on this page.

vi /mnt/efs/script1.sql

SELECT c.contentid||' xx '||c.contentid

  FROM content c

 WHERE c.spaceid = :space

   AND c.contenttype = 'PAGE'

   AND c.content_status = 'current' \g /mnt/efs/temp.sh

vi /mnt/efs/script2.sql

SELECT c.contentid,

       c.contenttype, 

       c.title

  FROM content c

 WHERE c.spaceid = :space

   AND c.contenttype = 'PAGE'

   AND c.content_status = 'current'

 ORDER BY c.title \g readme.txt

vi /mnt/efs/script.sh

export RDSHOST=RDSEndpoint

read -p "Enter spaceid: " myvar

export SPACEID=${myvar}

cd /mnt/efs/content/${SPACEID}

pwd

psql -h $RDSHOST -U atlconfluence -d confluence -t -v space=${SPACEID} -f /mnt/efs/script1.sql

cat /mnt/efs/temp.sh | awk 'NF' | sed 's/^/SELECT body FROM bodycontent WHERE contentid =/' | sed 's/xx/\\g/' | sed 's/$/.htm/' >/mnt/efs/temp.sql

psql -h $RDSHOST -U atlconfluence -d confluence -t -f /mnt/efs/temp.sql

psql -h $RDSHOST -U atlconfluence -d confluence -t -v space=${SPACEID} -f /mnt/efs/script2/sql

ls /mnt/efs/content/${SPACEID}

sh /mnt/efs/script.sh

Script should create an html file for each page and a readme.txt mapping the html file to a page title.

Attachments

Size...

SELECT c.title AS Attachment_Name, 

       cp.longval AS Attachment_Size, 

       s.spacename,

       c2.title AS Page_Title,

       c.pageid AS Location

  FROM content c

  JOIN content c2 ON c.pageid= c2.contentid

  JOIN contentproperties cp ON c.contentid = cp.contentid

  JOIN spaces s on c2.spaceid = s.spaceid

 WHERE c.contenttype = 'ATTACHMENT'

   AND cp.propertyname = 'FILESIZE'

 ORDER BY cp.longval DESC;

To extract all attachments to their original filenames...

First create the directory structure that the attachments will be saved to.

Run the SQL to generate the mkdir script.

Run sh /tmp/mktarget.sh to create the directories.

SELECT 'mkdir -p /mnt/efs/content/'||

       c.spaceid||'/'||

       c.pageid||'/'||

       c.contentid AS MK

  FROM content c

 WHERE c.contenttype = 'ATTACHMENT'

   AND c.content_status = 'current'\g /tmp/mktarget.sh

                                   mk------------------------------------------------------------------------- mkdir -p /mnt/efs/content/1605635/162955322/162955320 mkdir -p /mnt/efs/content/1605635/162955322/162955326 mkdir -p /mnt/efs/content/1605635/162955322/162955324 mkdir -p /mnt/efs/content/1605635/162955368/162955366 ...

Run the SQL to generate the copy script.

Run sh /tmp/cpattach.sh to copy the attachments to their original names.

SELECT 'rsync -i /mnt/efs/aws*/confluence/shared-home/attachments/ver003/'||

       MOD(CAST(       RIGHT(CAST(c.spaceid AS VARCHAR),3)      AS INTEGER),250)||'/'||

       MOD(CAST(SUBSTR(RIGHT(CAST(c.spaceid AS VARCHAR),6),1,3) AS INTEGER),250)||'/'||

       c.spaceid||'/'||

       MOD(CAST(       RIGHT(CAST(c.pageid AS VARCHAR),3)      AS INTEGER),250)||'/'||

       MOD(CAST(SUBSTR(RIGHT(CAST(c.pageid AS VARCHAR),6),1,3) AS INTEGER),250)||'/'||

       c.pageid||'/'||

       c.contentid||'/'||

       c.version||' '||

       '/mnt/efs/content/'||

       c.spaceid||'/'||

       c.pageid||'/'||

       c.contentid||'/'||'"'||

       c.title||'"' AS MV

  FROM content c

 WHERE c.contenttype = 'ATTACHMENT'

   AND c.content_status = 'current'\g /tmp/cpattach.sh

Bibliography